Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

author David S. Miller <davem@davemloft.net>

Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)

committer David S. Miller <davem@davemloft.net>

Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
author David S. Miller <davem@davemloft.net>
Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
committer David S. Miller <davem@davemloft.net>
Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt

index b959659..ccb6048 100644 (file)
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -603,3 +603,10 @@ Why:       The adm9240, w83792d and w83793 hardware monitoring drivers have
  Who:   Jean Delvare <khali@linux-fr.org>
  
  ----------------------------
+
+What:  xt_connlimit rev 0
+When:  2012
+Who:   Jan Engelhardt <jengelh@medozas.de>
+Files: net/netfilter/xt_connlimit.c
+
+----------------------------
diff --git a/drivers/net/atl1c/atl1c_hw.c b/drivers/net/atl1c/atl1c_hw.c

index 1bf6720..23f2ab0 100644 (file)
--- a/drivers/net/atl1c/atl1c_hw.c
+++ b/drivers/net/atl1c/atl1c_hw.c
@@ -345,7 +345,7 @@ int atl1c_write_phy_reg(struct atl1c_hw *hw, u32 reg_addr, u16 phy_data)
   */
  static int atl1c_phy_setup_adv(struct atl1c_hw *hw)
  {
-       u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_SPEED_MASK;
+       u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_ALL;
         u16 mii_giga_ctrl_data = GIGA_CR_1000T_DEFAULT_CAP &
                                 ~GIGA_CR_1000T_SPEED_MASK;
  
@@ -373,7 +373,7 @@ static int atl1c_phy_setup_adv(struct atl1c_hw *hw)
         }
  
         if (atl1c_write_phy_reg(hw, MII_ADVERTISE, mii_adv_data) != 0 ||
-           atl1c_write_phy_reg(hw, MII_GIGA_CR, mii_giga_ctrl_data) != 0)
+           atl1c_write_phy_reg(hw, MII_CTRL1000, mii_giga_ctrl_data) != 0)
                 return -1;
         return 0;
  }
@@ -517,19 +517,18 @@ int atl1c_phy_init(struct atl1c_hw *hw)
                                         "Error Setting up Auto-Negotiation\n");
                         return ret_val;
                 }
-               mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG;
+               mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART;
                 break;
         case MEDIA_TYPE_100M_FULL:
-               mii_bmcr_data |= BMCR_SPEED_100 | BMCR_FULL_DUPLEX;
+               mii_bmcr_data |= BMCR_SPEED100 | BMCR_FULLDPLX;
                 break;
         case MEDIA_TYPE_100M_HALF:
-               mii_bmcr_data |= BMCR_SPEED_100;
+               mii_bmcr_data |= BMCR_SPEED100;
                 break;
         case MEDIA_TYPE_10M_FULL:
-               mii_bmcr_data |= BMCR_SPEED_10 | BMCR_FULL_DUPLEX;
+               mii_bmcr_data |= BMCR_FULLDPLX;
                 break;
         case MEDIA_TYPE_10M_HALF:
-               mii_bmcr_data |= BMCR_SPEED_10;
                 break;
         default:
                 if (netif_msg_link(adapter))
@@ -657,7 +656,7 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw)
         err = atl1c_phy_setup_adv(hw);
         if (err)
                 return err;
-       mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG;
+       mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART;
  
         return atl1c_write_phy_reg(hw, MII_BMCR, mii_bmcr_data);
  }
diff --git a/drivers/net/atl1c/atl1c_hw.h b/drivers/net/atl1c/atl1c_hw.h

index 3dd6759..655fc6c 100644 (file)
--- a/drivers/net/atl1c/atl1c_hw.h
+++ b/drivers/net/atl1c/atl1c_hw.h
@@ -736,55 +736,16 @@ int atl1c_phy_power_saving(struct atl1c_hw *hw);
  #define REG_DEBUG_DATA0                0x1900
  #define REG_DEBUG_DATA1                0x1904
  
-/* PHY Control Register */
-#define MII_BMCR                       0x00
-#define BMCR_SPEED_SELECT_MSB          0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define BMCR_COLL_TEST_ENABLE          0x0080  /* Collision test enable */
-#define BMCR_FULL_DUPLEX               0x0100  /* FDX =1, half duplex =0 */
-#define BMCR_RESTART_AUTO_NEG          0x0200  /* Restart auto negotiation */
-#define BMCR_ISOLATE                   0x0400  /* Isolate PHY from MII */
-#define BMCR_POWER_DOWN                        0x0800  /* Power down */
-#define BMCR_AUTO_NEG_EN               0x1000  /* Auto Neg Enable */
-#define BMCR_SPEED_SELECT_LSB          0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define BMCR_LOOPBACK                  0x4000  /* 0 = normal, 1 = loopback */
-#define BMCR_RESET                     0x8000  /* 0 = normal, 1 = PHY reset */
-#define BMCR_SPEED_MASK                        0x2040
-#define BMCR_SPEED_1000                        0x0040
-#define BMCR_SPEED_100                 0x2000
-#define BMCR_SPEED_10                  0x0000
-
-/* PHY Status Register */
-#define MII_BMSR                       0x01
-#define BMMSR_EXTENDED_CAPS            0x0001  /* Extended register capabilities */
-#define BMSR_JABBER_DETECT             0x0002  /* Jabber Detected */
-#define BMSR_LINK_STATUS               0x0004  /* Link Status 1 = link */
-#define BMSR_AUTONEG_CAPS              0x0008  /* Auto Neg Capable */
-#define BMSR_REMOTE_FAULT              0x0010  /* Remote Fault Detect */
-#define BMSR_AUTONEG_COMPLETE          0x0020  /* Auto Neg Complete */
-#define BMSR_PREAMBLE_SUPPRESS         0x0040  /* Preamble may be suppressed */
-#define BMSR_EXTENDED_STATUS           0x0100  /* Ext. status info in Reg 0x0F */
-#define BMSR_100T2_HD_CAPS             0x0200  /* 100T2 Half Duplex Capable */
-#define BMSR_100T2_FD_CAPS             0x0400  /* 100T2 Full Duplex Capable */
-#define BMSR_10T_HD_CAPS               0x0800  /* 10T   Half Duplex Capable */
-#define BMSR_10T_FD_CAPS               0x1000  /* 10T   Full Duplex Capable */
-#define BMSR_100X_HD_CAPS              0x2000  /* 100X  Half Duplex Capable */
-#define BMMII_SR_100X_FD_CAPS          0x4000  /* 100X  Full Duplex Capable */
-#define BMMII_SR_100T4_CAPS            0x8000  /* 100T4 Capable */
-
-#define MII_PHYSID1                    0x02
-#define MII_PHYSID2                    0x03
  #define L1D_MPW_PHYID1                 0xD01C  /* V7 */
  #define L1D_MPW_PHYID2                 0xD01D  /* V1-V6 */
  #define L1D_MPW_PHYID3                 0xD01E  /* V8 */
  
  
  /* Autoneg Advertisement Register */
-#define MII_ADVERTISE                  0x04
-#define ADVERTISE_SPEED_MASK           0x01E0
-#define ADVERTISE_DEFAULT_CAP          0x0DE0
+#define ADVERTISE_DEFAULT_CAP \
+       (ADVERTISE_ALL | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)
  
  /* 1000BASE-T Control Register */
-#define MII_GIGA_CR                    0x09
  #define GIGA_CR_1000T_REPEATER_DTE     0x0400  /* 1=Repeater/switch device port 0=DTE device */
  
  #define GIGA_CR_1000T_MS_VALUE         0x0800  /* 1=Configure PHY as Master 0=Configure PHY as Slave */
diff --git a/drivers/net/atl1e/atl1e_ethtool.c b/drivers/net/atl1e/atl1e_ethtool.c

index 6943a6c..1209297 100644 (file)
--- a/drivers/net/atl1e/atl1e_ethtool.c
+++ b/drivers/net/atl1e/atl1e_ethtool.c
@@ -95,18 +95,18 @@ static int atl1e_set_settings(struct net_device *netdev,
                 ecmd->advertising = hw->autoneg_advertised |
                                     ADVERTISED_TP | ADVERTISED_Autoneg;
  
-               adv4 = hw->mii_autoneg_adv_reg & ~MII_AR_SPEED_MASK;
+               adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL;
                 adv9 = hw->mii_1000t_ctrl_reg & ~MII_AT001_CR_1000T_SPEED_MASK;
                 if (hw->autoneg_advertised & ADVERTISE_10_HALF)
-                       adv4 |= MII_AR_10T_HD_CAPS;
+                       adv4 |= ADVERTISE_10HALF;
                 if (hw->autoneg_advertised & ADVERTISE_10_FULL)
-                       adv4 |= MII_AR_10T_FD_CAPS;
+                       adv4 |= ADVERTISE_10FULL;
                 if (hw->autoneg_advertised & ADVERTISE_100_HALF)
-                       adv4 |= MII_AR_100TX_HD_CAPS;
+                       adv4 |= ADVERTISE_100HALF;
                 if (hw->autoneg_advertised & ADVERTISE_100_FULL)
-                       adv4 |= MII_AR_100TX_FD_CAPS;
+                       adv4 |= ADVERTISE_100FULL;
                 if (hw->autoneg_advertised & ADVERTISE_1000_FULL)
-                       adv9 |= MII_AT001_CR_1000T_FD_CAPS;
+                       adv9 |= ADVERTISE_1000FULL;
  
                 if (adv4 != hw->mii_autoneg_adv_reg ||
                                 adv9 != hw->mii_1000t_ctrl_reg) {
diff --git a/drivers/net/atl1e/atl1e_hw.c b/drivers/net/atl1e/atl1e_hw.c

index 76cc043..923063d 100644 (file)
--- a/drivers/net/atl1e/atl1e_hw.c
+++ b/drivers/net/atl1e/atl1e_hw.c
@@ -318,7 +318,7 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw)
          * Advertisement Register (Address 4) and the 1000 mb speed bits in
          * the  1000Base-T control Register (Address 9).
          */
-       mii_autoneg_adv_reg &= ~MII_AR_SPEED_MASK;
+       mii_autoneg_adv_reg &= ~ADVERTISE_ALL;
         mii_1000t_ctrl_reg  &= ~MII_AT001_CR_1000T_SPEED_MASK;
  
         /*
@@ -327,44 +327,37 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw)
          */
         switch (hw->media_type) {
         case MEDIA_TYPE_AUTO_SENSOR:
-               mii_autoneg_adv_reg |= (MII_AR_10T_HD_CAPS   |
-                                       MII_AR_10T_FD_CAPS   |
-                                       MII_AR_100TX_HD_CAPS |
-                                       MII_AR_100TX_FD_CAPS);
-               hw->autoneg_advertised = ADVERTISE_10_HALF  |
-                                        ADVERTISE_10_FULL  |
-                                        ADVERTISE_100_HALF |
-                                        ADVERTISE_100_FULL;
+               mii_autoneg_adv_reg |= ADVERTISE_ALL;
+               hw->autoneg_advertised = ADVERTISE_ALL;
                 if (hw->nic_type == athr_l1e) {
-                       mii_1000t_ctrl_reg |=
-                               MII_AT001_CR_1000T_FD_CAPS;
+                       mii_1000t_ctrl_reg |= ADVERTISE_1000FULL;
                         hw->autoneg_advertised |= ADVERTISE_1000_FULL;
                 }
                 break;
  
         case MEDIA_TYPE_100M_FULL:
-               mii_autoneg_adv_reg   |= MII_AR_100TX_FD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_100FULL;
                 hw->autoneg_advertised = ADVERTISE_100_FULL;
                 break;
  
         case MEDIA_TYPE_100M_HALF:
-               mii_autoneg_adv_reg   |= MII_AR_100TX_HD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_100_HALF;
                 hw->autoneg_advertised = ADVERTISE_100_HALF;
                 break;
  
         case MEDIA_TYPE_10M_FULL:
-               mii_autoneg_adv_reg   |= MII_AR_10T_FD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_10_FULL;
                 hw->autoneg_advertised = ADVERTISE_10_FULL;
                 break;
  
         default:
-               mii_autoneg_adv_reg   |= MII_AR_10T_HD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_10_HALF;
                 hw->autoneg_advertised = ADVERTISE_10_HALF;
                 break;
         }
  
         /* flow control fixed to enable all */
-       mii_autoneg_adv_reg |= (MII_AR_ASM_DIR | MII_AR_PAUSE);
+       mii_autoneg_adv_reg |= (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP);
  
         hw->mii_autoneg_adv_reg = mii_autoneg_adv_reg;
         hw->mii_1000t_ctrl_reg  = mii_1000t_ctrl_reg;
@@ -374,7 +367,7 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw)
                 return ret_val;
  
         if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) {
-               ret_val = atl1e_write_phy_reg(hw, MII_AT001_CR,
+               ret_val = atl1e_write_phy_reg(hw, MII_CTRL1000,
                                            mii_1000t_ctrl_reg);
                 if (ret_val)
                         return ret_val;
@@ -397,7 +390,7 @@ int atl1e_phy_commit(struct atl1e_hw *hw)
         int ret_val;
         u16 phy_data;
  
-       phy_data = MII_CR_RESET | MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG;
+       phy_data = BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART;
  
         ret_val = atl1e_write_phy_reg(hw, MII_BMCR, phy_data);
         if (ret_val) {
@@ -645,15 +638,14 @@ int atl1e_restart_autoneg(struct atl1e_hw *hw)
                 return err;
  
         if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) {
-               err = atl1e_write_phy_reg(hw, MII_AT001_CR,
+               err = atl1e_write_phy_reg(hw, MII_CTRL1000,
                                        hw->mii_1000t_ctrl_reg);
                 if (err)
                         return err;
         }
  
         err = atl1e_write_phy_reg(hw, MII_BMCR,
-                       MII_CR_RESET | MII_CR_AUTO_NEG_EN |
-                       MII_CR_RESTART_AUTO_NEG);
+                       BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
         return err;
  }
  
diff --git a/drivers/net/atl1e/atl1e_hw.h b/drivers/net/atl1e/atl1e_hw.h

index 5ea2f4d..74df16a 100644 (file)
--- a/drivers/net/atl1e/atl1e_hw.h
+++ b/drivers/net/atl1e/atl1e_hw.h
@@ -629,127 +629,24 @@ s32 atl1e_restart_autoneg(struct atl1e_hw *hw);
  
  /***************************** MII definition ***************************************/
  /* PHY Common Register */
-#define MII_BMCR                        0x00
-#define MII_BMSR                        0x01
-#define MII_PHYSID1                     0x02
-#define MII_PHYSID2                     0x03
-#define MII_ADVERTISE                   0x04
-#define MII_LPA                         0x05
-#define MII_EXPANSION                   0x06
-#define MII_AT001_CR                    0x09
-#define MII_AT001_SR                    0x0A
-#define MII_AT001_ESR                   0x0F
  #define MII_AT001_PSCR                  0x10
  #define MII_AT001_PSSR                  0x11
  #define MII_INT_CTRL                    0x12
  #define MII_INT_STATUS                  0x13
  #define MII_SMARTSPEED                  0x14
-#define MII_RERRCOUNTER                 0x15
-#define MII_SREVISION                   0x16
-#define MII_RESV1                       0x17
  #define MII_LBRERROR                    0x18
-#define MII_PHYADDR                     0x19
  #define MII_RESV2                       0x1a
-#define MII_TPISTATUS                   0x1b
-#define MII_NCONFIG                     0x1c
  
  #define MII_DBG_ADDR                   0x1D
  #define MII_DBG_DATA                   0x1E
  
-
-/* PHY Control Register */
-#define MII_CR_SPEED_SELECT_MSB                  0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_COLL_TEST_ENABLE                  0x0080  /* Collision test enable */
-#define MII_CR_FULL_DUPLEX                       0x0100  /* FDX =1, half duplex =0 */
-#define MII_CR_RESTART_AUTO_NEG                  0x0200  /* Restart auto negotiation */
-#define MII_CR_ISOLATE                           0x0400  /* Isolate PHY from MII */
-#define MII_CR_POWER_DOWN                        0x0800  /* Power down */
-#define MII_CR_AUTO_NEG_EN                       0x1000  /* Auto Neg Enable */
-#define MII_CR_SPEED_SELECT_LSB                  0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_LOOPBACK                          0x4000  /* 0 = normal, 1 = loopback */
-#define MII_CR_RESET                             0x8000  /* 0 = normal, 1 = PHY reset */
-#define MII_CR_SPEED_MASK                        0x2040
-#define MII_CR_SPEED_1000                        0x0040
-#define MII_CR_SPEED_100                         0x2000
-#define MII_CR_SPEED_10                          0x0000
-
-
-/* PHY Status Register */
-#define MII_SR_EXTENDED_CAPS                     0x0001  /* Extended register capabilities */
-#define MII_SR_JABBER_DETECT                     0x0002  /* Jabber Detected */
-#define MII_SR_LINK_STATUS                       0x0004  /* Link Status 1 = link */
-#define MII_SR_AUTONEG_CAPS                      0x0008  /* Auto Neg Capable */
-#define MII_SR_REMOTE_FAULT                      0x0010  /* Remote Fault Detect */
-#define MII_SR_AUTONEG_COMPLETE                  0x0020  /* Auto Neg Complete */
-#define MII_SR_PREAMBLE_SUPPRESS                 0x0040  /* Preamble may be suppressed */
-#define MII_SR_EXTENDED_STATUS                   0x0100  /* Ext. status info in Reg 0x0F */
-#define MII_SR_100T2_HD_CAPS                     0x0200  /* 100T2 Half Duplex Capable */
-#define MII_SR_100T2_FD_CAPS                     0x0400  /* 100T2 Full Duplex Capable */
-#define MII_SR_10T_HD_CAPS                       0x0800  /* 10T   Half Duplex Capable */
-#define MII_SR_10T_FD_CAPS                       0x1000  /* 10T   Full Duplex Capable */
-#define MII_SR_100X_HD_CAPS                      0x2000  /* 100X  Half Duplex Capable */
-#define MII_SR_100X_FD_CAPS                      0x4000  /* 100X  Full Duplex Capable */
-#define MII_SR_100T4_CAPS                        0x8000  /* 100T4 Capable */
-
-/* Link partner ability register. */
-#define MII_LPA_SLCT                             0x001f  /* Same as advertise selector  */
-#define MII_LPA_10HALF                           0x0020  /* Can do 10mbps half-duplex   */
-#define MII_LPA_10FULL                           0x0040  /* Can do 10mbps full-duplex   */
-#define MII_LPA_100HALF                          0x0080  /* Can do 100mbps half-duplex  */
-#define MII_LPA_100FULL                          0x0100  /* Can do 100mbps full-duplex  */
-#define MII_LPA_100BASE4                         0x0200  /* 100BASE-T4  */
-#define MII_LPA_PAUSE                            0x0400  /* PAUSE */
-#define MII_LPA_ASYPAUSE                         0x0800  /* Asymmetrical PAUSE */
-#define MII_LPA_RFAULT                           0x2000  /* Link partner faulted        */
-#define MII_LPA_LPACK                            0x4000  /* Link partner acked us       */
-#define MII_LPA_NPAGE                            0x8000  /* Next page bit               */
-
  /* Autoneg Advertisement Register */
-#define MII_AR_SELECTOR_FIELD                   0x0001  /* indicates IEEE 802.3 CSMA/CD */
-#define MII_AR_10T_HD_CAPS                      0x0020  /* 10T   Half Duplex Capable */
-#define MII_AR_10T_FD_CAPS                      0x0040  /* 10T   Full Duplex Capable */
-#define MII_AR_100TX_HD_CAPS                    0x0080  /* 100TX Half Duplex Capable */
-#define MII_AR_100TX_FD_CAPS                    0x0100  /* 100TX Full Duplex Capable */
-#define MII_AR_100T4_CAPS                       0x0200  /* 100T4 Capable */
-#define MII_AR_PAUSE                            0x0400  /* Pause operation desired */
-#define MII_AR_ASM_DIR                          0x0800  /* Asymmetric Pause Direction bit */
-#define MII_AR_REMOTE_FAULT                     0x2000  /* Remote Fault detected */
-#define MII_AR_NEXT_PAGE                        0x8000  /* Next Page ability supported */
-#define MII_AR_SPEED_MASK                       0x01E0
-#define MII_AR_DEFAULT_CAP_MASK                 0x0DE0
+#define MII_AR_DEFAULT_CAP_MASK                 0
  
  /* 1000BASE-T Control Register */
-#define MII_AT001_CR_1000T_HD_CAPS              0x0100  /* Advertise 1000T HD capability */
-#define MII_AT001_CR_1000T_FD_CAPS              0x0200  /* Advertise 1000T FD capability  */
-#define MII_AT001_CR_1000T_REPEATER_DTE         0x0400  /* 1=Repeater/switch device port */
-/* 0=DTE device */
-#define MII_AT001_CR_1000T_MS_VALUE             0x0800  /* 1=Configure PHY as Master */
-/* 0=Configure PHY as Slave */
-#define MII_AT001_CR_1000T_MS_ENABLE            0x1000  /* 1=Master/Slave manual config value */
-/* 0=Automatic Master/Slave config */
-#define MII_AT001_CR_1000T_TEST_MODE_NORMAL     0x0000  /* Normal Operation */
-#define MII_AT001_CR_1000T_TEST_MODE_1          0x2000  /* Transmit Waveform test */
-#define MII_AT001_CR_1000T_TEST_MODE_2          0x4000  /* Master Transmit Jitter test */
-#define MII_AT001_CR_1000T_TEST_MODE_3          0x6000  /* Slave Transmit Jitter test */
-#define MII_AT001_CR_1000T_TEST_MODE_4          0x8000  /* Transmitter Distortion test */
-#define MII_AT001_CR_1000T_SPEED_MASK           0x0300
-#define MII_AT001_CR_1000T_DEFAULT_CAP_MASK     0x0300
-
-/* 1000BASE-T Status Register */
-#define MII_AT001_SR_1000T_LP_HD_CAPS           0x0400  /* LP is 1000T HD capable */
-#define MII_AT001_SR_1000T_LP_FD_CAPS           0x0800  /* LP is 1000T FD capable */
-#define MII_AT001_SR_1000T_REMOTE_RX_STATUS     0x1000  /* Remote receiver OK */
-#define MII_AT001_SR_1000T_LOCAL_RX_STATUS      0x2000  /* Local receiver OK */
-#define MII_AT001_SR_1000T_MS_CONFIG_RES        0x4000  /* 1=Local TX is Master, 0=Slave */
-#define MII_AT001_SR_1000T_MS_CONFIG_FAULT      0x8000  /* Master/Slave config fault */
-#define MII_AT001_SR_1000T_REMOTE_RX_STATUS_SHIFT   12
-#define MII_AT001_SR_1000T_LOCAL_RX_STATUS_SHIFT    13
-
-/* Extended Status Register */
-#define MII_AT001_ESR_1000T_HD_CAPS             0x1000  /* 1000T HD capable */
-#define MII_AT001_ESR_1000T_FD_CAPS             0x2000  /* 1000T FD capable */
-#define MII_AT001_ESR_1000X_HD_CAPS             0x4000  /* 1000X HD capable */
-#define MII_AT001_ESR_1000X_FD_CAPS             0x8000  /* 1000X FD capable */
+#define MII_AT001_CR_1000T_SPEED_MASK \
+       (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
+#define MII_AT001_CR_1000T_DEFAULT_CAP_MASK    MII_AT001_CR_1000T_SPEED_MASK
  
  /* AT001 PHY Specific Control Register */
  #define MII_AT001_PSCR_JABBER_DISABLE           0x0001  /* 1=Jabber Function disabled */
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c

index e28f8ba..bf7500c 100644 (file)
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -2051,9 +2051,9 @@ static int atl1e_suspend(struct pci_dev *pdev, pm_message_t state)
                 atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
                 atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
  
-               mii_advertise_data = MII_AR_10T_HD_CAPS;
+               mii_advertise_data = ADVERTISE_10HALF;
  
-               if ((atl1e_write_phy_reg(hw, MII_AT001_CR, 0) != 0) ||
+               if ((atl1e_write_phy_reg(hw, MII_CTRL1000, 0) != 0) ||
                     (atl1e_write_phy_reg(hw,
                            MII_ADVERTISE, mii_advertise_data) != 0) ||
                     (atl1e_phy_commit(hw)) != 0) {
diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h

index e610e13..00bf595 100644 (file)
--- a/drivers/net/e1000e/e1000.h
+++ b/drivers/net/e1000e/e1000.h
@@ -364,6 +364,7 @@ struct e1000_adapter {
         /* structs defined in e1000_hw.h */
         struct e1000_hw hw;
  
+       spinlock_t stats64_lock;
         struct e1000_hw_stats stats;
         struct e1000_phy_info phy_info;
         struct e1000_phy_stats phy_stats;
@@ -494,7 +495,9 @@ extern int e1000e_setup_rx_resources(struct e1000_adapter *adapter);
  extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter);
  extern void e1000e_free_rx_resources(struct e1000_adapter *adapter);
  extern void e1000e_free_tx_resources(struct e1000_adapter *adapter);
-extern void e1000e_update_stats(struct e1000_adapter *adapter);
+extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
+                                                    struct rtnl_link_stats64
+                                                    *stats);
  extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter);
  extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter);
  extern void e1000e_get_hw_control(struct e1000_adapter *adapter);
diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c

index fa08b63..daa7fe4 100644 (file)
--- a/drivers/net/e1000e/ethtool.c
+++ b/drivers/net/e1000e/ethtool.c
@@ -46,15 +46,15 @@ struct e1000_stats {
  };
  
  #define E1000_STAT(str, m) { \
-                       .stat_string = str, \
-                       .type = E1000_STATS, \
-                       .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
-                       .stat_offset = offsetof(struct e1000_adapter, m) }
+               .stat_string = str, \
+               .type = E1000_STATS, \
+               .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
+               .stat_offset = offsetof(struct e1000_adapter, m) }
  #define E1000_NETDEV_STAT(str, m) { \
-                       .stat_string = str, \
-                       .type = NETDEV_STATS, \
-                       .sizeof_stat = sizeof(((struct net_device *)0)->m), \
-                       .stat_offset = offsetof(struct net_device, m) }
+               .stat_string = str, \
+               .type = NETDEV_STATS, \
+               .sizeof_stat = sizeof(((struct rtnl_link_stats64 *)0)->m), \
+               .stat_offset = offsetof(struct rtnl_link_stats64, m) }
  
  static const struct e1000_stats e1000_gstrings_stats[] = {
         E1000_STAT("rx_packets", stats.gprc),
@@ -65,21 +65,21 @@ static const struct e1000_stats e1000_gstrings_stats[] = {
         E1000_STAT("tx_broadcast", stats.bptc),
         E1000_STAT("rx_multicast", stats.mprc),
         E1000_STAT("tx_multicast", stats.mptc),
-       E1000_NETDEV_STAT("rx_errors", stats.rx_errors),
-       E1000_NETDEV_STAT("tx_errors", stats.tx_errors),
-       E1000_NETDEV_STAT("tx_dropped", stats.tx_dropped),
+       E1000_NETDEV_STAT("rx_errors", rx_errors),
+       E1000_NETDEV_STAT("tx_errors", tx_errors),
+       E1000_NETDEV_STAT("tx_dropped", tx_dropped),
         E1000_STAT("multicast", stats.mprc),
         E1000_STAT("collisions", stats.colc),
-       E1000_NETDEV_STAT("rx_length_errors", stats.rx_length_errors),
-       E1000_NETDEV_STAT("rx_over_errors", stats.rx_over_errors),
+       E1000_NETDEV_STAT("rx_length_errors", rx_length_errors),
+       E1000_NETDEV_STAT("rx_over_errors", rx_over_errors),
         E1000_STAT("rx_crc_errors", stats.crcerrs),
-       E1000_NETDEV_STAT("rx_frame_errors", stats.rx_frame_errors),
+       E1000_NETDEV_STAT("rx_frame_errors", rx_frame_errors),
         E1000_STAT("rx_no_buffer_count", stats.rnbc),
         E1000_STAT("rx_missed_errors", stats.mpc),
         E1000_STAT("tx_aborted_errors", stats.ecol),
         E1000_STAT("tx_carrier_errors", stats.tncrs),
-       E1000_NETDEV_STAT("tx_fifo_errors", stats.tx_fifo_errors),
-       E1000_NETDEV_STAT("tx_heartbeat_errors", stats.tx_heartbeat_errors),
+       E1000_NETDEV_STAT("tx_fifo_errors", tx_fifo_errors),
+       E1000_NETDEV_STAT("tx_heartbeat_errors", tx_heartbeat_errors),
         E1000_STAT("tx_window_errors", stats.latecol),
         E1000_STAT("tx_abort_late_coll", stats.latecol),
         E1000_STAT("tx_deferred_ok", stats.dc),
@@ -684,20 +684,13 @@ static int e1000_set_ringparam(struct net_device *netdev,
         rx_old = adapter->rx_ring;
  
         err = -ENOMEM;
-       tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
+       tx_ring = kmemdup(tx_old, sizeof(struct e1000_ring), GFP_KERNEL);
         if (!tx_ring)
                 goto err_alloc_tx;
-       /*
-        * use a memcpy to save any previously configured
-        * items like napi structs from having to be
-        * reinitialized
-        */
-       memcpy(tx_ring, tx_old, sizeof(struct e1000_ring));
  
-       rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
+       rx_ring = kmemdup(rx_old, sizeof(struct e1000_ring), GFP_KERNEL);
         if (!rx_ring)
                 goto err_alloc_rx;
-       memcpy(rx_ring, rx_old, sizeof(struct e1000_ring));
  
         adapter->tx_ring = tx_ring;
         adapter->rx_ring = rx_ring;
@@ -1255,7 +1248,6 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
  {
         struct e1000_hw *hw = &adapter->hw;
         u32 ctrl_reg = 0;
-       u32 stat_reg = 0;
         u16 phy_reg = 0;
         s32 ret_val = 0;
  
@@ -1363,8 +1355,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
                  * Set the ILOS bit on the fiber Nic if half duplex link is
                  * detected.
                  */
-               stat_reg = er32(STATUS);
-               if ((stat_reg & E1000_STATUS_FD) == 0)
+               if ((er32(STATUS) & E1000_STATUS_FD) == 0)
                         ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU);
         }
  
@@ -1982,14 +1973,15 @@ static void e1000_get_ethtool_stats(struct net_device *netdev,
                                     u64 *data)
  {
         struct e1000_adapter *adapter = netdev_priv(netdev);
+       struct rtnl_link_stats64 net_stats;
         int i;
         char *p = NULL;
  
-       e1000e_update_stats(adapter);
+       e1000e_get_stats64(netdev, &net_stats);
         for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
                 switch (e1000_gstrings_stats[i].type) {
                 case NETDEV_STATS:
-                       p = (char *) netdev +
+                       p = (char *) &net_stats +
                                         e1000_gstrings_stats[i].stat_offset;
                         break;
                 case E1000_STATS:
diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c

index fb46974..232b42b 100644 (file)
--- a/drivers/net/e1000e/ich8lan.c
+++ b/drivers/net/e1000e/ich8lan.c
@@ -2104,7 +2104,6 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
  {
         union ich8_hws_flash_status hsfsts;
         s32 ret_val = -E1000_ERR_NVM;
-       s32 i = 0;
  
         hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
  
@@ -2140,6 +2139,8 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
                 ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
                 ret_val = 0;
         } else {
+               s32 i = 0;
+
                 /*
                  * Otherwise poll for sometime so the current
                  * cycle has a chance to end before giving up.
diff --git a/drivers/net/e1000e/lib.c b/drivers/net/e1000e/lib.c

index 68aa174..96921de 100644 (file)
--- a/drivers/net/e1000e/lib.c
+++ b/drivers/net/e1000e/lib.c
@@ -1978,15 +1978,15 @@ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
  {
         struct e1000_nvm_info *nvm = &hw->nvm;
         u32 eecd = er32(EECD);
-       u16 timeout = 0;
         u8 spi_stat_reg;
  
         if (nvm->type == e1000_nvm_eeprom_spi) {
+               u16 timeout = NVM_MAX_RETRY_SPI;
+
                 /* Clear SK and CS */
                 eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
                 ew32(EECD, eecd);
                 udelay(1);
-               timeout = NVM_MAX_RETRY_SPI;
  
                 /*
                  * Read "Status Register" repeatedly until the LSB is cleared.
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c

index 1c18f26..5b916b0 100644 (file)
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -900,8 +900,6 @@ next_desc:
  
         adapter->total_rx_bytes += total_rx_bytes;
         adapter->total_rx_packets += total_rx_packets;
-       netdev->stats.rx_bytes += total_rx_bytes;
-       netdev->stats.rx_packets += total_rx_packets;
         return cleaned;
  }
  
@@ -1057,8 +1055,6 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
         }
         adapter->total_tx_bytes += total_tx_bytes;
         adapter->total_tx_packets += total_tx_packets;
-       netdev->stats.tx_bytes += total_tx_bytes;
-       netdev->stats.tx_packets += total_tx_packets;
         return count < tx_ring->count;
  }
  
@@ -1245,8 +1241,6 @@ next_desc:
  
         adapter->total_rx_bytes += total_rx_bytes;
         adapter->total_rx_packets += total_rx_packets;
-       netdev->stats.rx_bytes += total_rx_bytes;
-       netdev->stats.rx_packets += total_rx_packets;
         return cleaned;
  }
  
@@ -1426,8 +1420,6 @@ next_desc:
  
         adapter->total_rx_bytes += total_rx_bytes;
         adapter->total_rx_packets += total_rx_packets;
-       netdev->stats.rx_bytes += total_rx_bytes;
-       netdev->stats.rx_packets += total_rx_packets;
         return cleaned;
  }
  
@@ -2728,7 +2720,6 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
  {
         struct e1000_hw *hw = &adapter->hw;
         u32 rctl, rfctl;
-       u32 psrctl = 0;
         u32 pages = 0;
  
         /* Workaround Si errata on 82579 - configure jumbo frame flow */
@@ -2827,6 +2818,8 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
                 adapter->rx_ps_pages = 0;
  
         if (adapter->rx_ps_pages) {
+               u32 psrctl = 0;
+
                 /* Configure extra packet-split registers */
                 rfctl = er32(RFCTL);
                 rfctl |= E1000_RFCTL_EXTEN;
@@ -3028,7 +3021,6 @@ static void e1000_set_multi(struct net_device *netdev)
         struct netdev_hw_addr *ha;
         u8  *mta_list;
         u32 rctl;
-       int i;
  
         /* Check for Promiscuous and All Multicast modes */
  
@@ -3051,12 +3043,13 @@ static void e1000_set_multi(struct net_device *netdev)
         ew32(RCTL, rctl);
  
         if (!netdev_mc_empty(netdev)) {
+               int i = 0;
+
                 mta_list = kmalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
                 if (!mta_list)
                         return;
  
                 /* prepare a packed array of only addresses. */
-               i = 0;
                 netdev_for_each_mc_addr(ha, netdev)
                         memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
  
@@ -3338,6 +3331,8 @@ int e1000e_up(struct e1000_adapter *adapter)
         return 0;
  }
  
+static void e1000e_update_stats(struct e1000_adapter *adapter);
+
  void e1000e_down(struct e1000_adapter *adapter)
  {
         struct net_device *netdev = adapter->netdev;
@@ -3372,6 +3367,11 @@ void e1000e_down(struct e1000_adapter *adapter)
         del_timer_sync(&adapter->phy_info_timer);
  
         netif_carrier_off(netdev);
+
+       spin_lock(&adapter->stats64_lock);
+       e1000e_update_stats(adapter);
+       spin_unlock(&adapter->stats64_lock);
+
         adapter->link_speed = 0;
         adapter->link_duplex = 0;
  
@@ -3413,6 +3413,8 @@ static int __devinit e1000_sw_init(struct e1000_adapter *adapter)
         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
  
+       spin_lock_init(&adapter->stats64_lock);
+
         e1000e_set_interrupt_capability(adapter);
  
         if (e1000_alloc_queues(adapter))
@@ -3886,7 +3888,7 @@ release:
   * e1000e_update_stats - Update the board statistics counters
   * @adapter: board private structure
   **/
-void e1000e_update_stats(struct e1000_adapter *adapter)
+static void e1000e_update_stats(struct e1000_adapter *adapter)
  {
         struct net_device *netdev = adapter->netdev;
         struct e1000_hw *hw = &adapter->hw;
@@ -3998,10 +4000,11 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter)
  {
         struct e1000_hw *hw = &adapter->hw;
         struct e1000_phy_regs *phy = &adapter->phy_regs;
-       int ret_val;
  
         if ((er32(STATUS) & E1000_STATUS_LU) &&
             (adapter->hw.phy.media_type == e1000_media_type_copper)) {
+               int ret_val;
+
                 ret_val  = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr);
                 ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr);
                 ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise);
@@ -4147,7 +4150,6 @@ static void e1000_watchdog_task(struct work_struct *work)
         struct e1000_ring *tx_ring = adapter->tx_ring;
         struct e1000_hw *hw = &adapter->hw;
         u32 link, tctl;
-       int tx_pending = 0;
  
         link = e1000e_has_link(adapter);
         if ((netif_carrier_ok(netdev)) && link) {
@@ -4285,7 +4287,9 @@ static void e1000_watchdog_task(struct work_struct *work)
         }
  
  link_up:
+       spin_lock(&adapter->stats64_lock);
         e1000e_update_stats(adapter);
+       spin_unlock(&adapter->stats64_lock);
  
         mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
         adapter->tpt_old = adapter->stats.tpt;
@@ -4299,21 +4303,18 @@ link_up:
  
         e1000e_update_adaptive(&adapter->hw);
  
-       if (!netif_carrier_ok(netdev)) {
-               tx_pending = (e1000_desc_unused(tx_ring) + 1 <
-                              tx_ring->count);
-               if (tx_pending) {
-                       /*
-                        * We've lost link, so the controller stops DMA,
-                        * but we've got queued Tx work that's never going
-                        * to get done, so reset controller to flush Tx.
-                        * (Do the reset outside of interrupt context).
-                        */
-                       adapter->tx_timeout_count++;
-                       schedule_work(&adapter->reset_task);
-                       /* return immediately since reset is imminent */
-                       return;
-               }
+       if (!netif_carrier_ok(netdev) &&
+           (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) {
+               /*
+                * We've lost link, so the controller stops DMA,
+                * but we've got queued Tx work that's never going
+                * to get done, so reset controller to flush Tx.
+                * (Do the reset outside of interrupt context).
+                */
+               adapter->tx_timeout_count++;
+               schedule_work(&adapter->reset_task);
+               /* return immediately since reset is imminent */
+               return;
         }
  
         /* Simple mode for Interrupt Throttle Rate (ITR) */
@@ -4384,13 +4385,13 @@ static int e1000_tso(struct e1000_adapter *adapter,
         u32 cmd_length = 0;
         u16 ipcse = 0, tucse, mss;
         u8 ipcss, ipcso, tucss, tucso, hdr_len;
-       int err;
  
         if (!skb_is_gso(skb))
                 return 0;
  
         if (skb_header_cloned(skb)) {
-               err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+               int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+
                 if (err)
                         return err;
         }
@@ -4897,16 +4898,55 @@ static void e1000_reset_task(struct work_struct *work)
  }
  
  /**
- * e1000_get_stats - Get System Network Statistics
+ * e1000_get_stats64 - Get System Network Statistics
   * @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
   *
   * Returns the address of the device statistics structure.
- * The statistics are actually updated from the timer callback.
   **/
-static struct net_device_stats *e1000_get_stats(struct net_device *netdev)
+struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
+                                             struct rtnl_link_stats64 *stats)
  {
-       /* only return the current stats */
-       return &netdev->stats;
+       struct e1000_adapter *adapter = netdev_priv(netdev);
+
+       memset(stats, 0, sizeof(struct rtnl_link_stats64));
+       spin_lock(&adapter->stats64_lock);
+       e1000e_update_stats(adapter);
+       /* Fill out the OS statistics structure */
+       stats->rx_bytes = adapter->stats.gorc;
+       stats->rx_packets = adapter->stats.gprc;
+       stats->tx_bytes = adapter->stats.gotc;
+       stats->tx_packets = adapter->stats.gptc;
+       stats->multicast = adapter->stats.mprc;
+       stats->collisions = adapter->stats.colc;
+
+       /* Rx Errors */
+
+       /*
+        * RLEC on some newer hardware can be incorrect so build
+        * our own version based on RUC and ROC
+        */
+       stats->rx_errors = adapter->stats.rxerrc +
+               adapter->stats.crcerrs + adapter->stats.algnerrc +
+               adapter->stats.ruc + adapter->stats.roc +
+               adapter->stats.cexterr;
+       stats->rx_length_errors = adapter->stats.ruc +
+                                             adapter->stats.roc;
+       stats->rx_crc_errors = adapter->stats.crcerrs;
+       stats->rx_frame_errors = adapter->stats.algnerrc;
+       stats->rx_missed_errors = adapter->stats.mpc;
+
+       /* Tx Errors */
+       stats->tx_errors = adapter->stats.ecol +
+                                      adapter->stats.latecol;
+       stats->tx_aborted_errors = adapter->stats.ecol;
+       stats->tx_window_errors = adapter->stats.latecol;
+       stats->tx_carrier_errors = adapter->stats.tncrs;
+
+       /* Tx Dropped needs to be maintained elsewhere */
+
+       spin_unlock(&adapter->stats64_lock);
+       return stats;
  }
  
  /**
@@ -5476,9 +5516,10 @@ static irqreturn_t e1000_intr_msix(int irq, void *data)
  {
         struct net_device *netdev = data;
         struct e1000_adapter *adapter = netdev_priv(netdev);
-       int vector, msix_irq;
  
         if (adapter->msix_entries) {
+               int vector, msix_irq;
+
                 vector = 0;
                 msix_irq = adapter->msix_entries[vector].vector;
                 disable_irq(msix_irq);
@@ -5675,7 +5716,7 @@ static const struct net_device_ops e1000e_netdev_ops = {
         .ndo_open               = e1000_open,
         .ndo_stop               = e1000_close,
         .ndo_start_xmit         = e1000_xmit_frame,
-       .ndo_get_stats          = e1000_get_stats,
+       .ndo_get_stats64        = e1000e_get_stats64,
         .ndo_set_multicast_list = e1000_set_multi,
         .ndo_set_mac_address    = e1000_set_mac,
         .ndo_change_mtu         = e1000_change_mtu,
diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c

index 6bea051..6ae31fc 100644 (file)
--- a/drivers/net/e1000e/phy.c
+++ b/drivers/net/e1000e/phy.c
@@ -2409,9 +2409,7 @@ static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg)
  s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
  {
         s32 ret_val;
-       u32 page_select = 0;
         u32 page = offset >> IGP_PAGE_SHIFT;
-       u32 page_shift = 0;
  
         ret_val = hw->phy.ops.acquire(hw);
         if (ret_val)
@@ -2427,6 +2425,8 @@ s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
         hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
  
         if (offset > MAX_PHY_MULTI_PAGE_REG) {
+               u32 page_shift, page_select;
+
                 /*
                  * Page select is register 31 for phy address 1 and 22 for
                  * phy address 2 and 3. Page select is shifted only for
@@ -2468,9 +2468,7 @@ out:
  s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
  {
         s32 ret_val;
-       u32 page_select = 0;
         u32 page = offset >> IGP_PAGE_SHIFT;
-       u32 page_shift = 0;
  
         ret_val = hw->phy.ops.acquire(hw);
         if (ret_val)
@@ -2486,6 +2484,8 @@ s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
         hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
  
         if (offset > MAX_PHY_MULTI_PAGE_REG) {
+               u32 page_shift, page_select;
+
                 /*
                  * Page select is register 31 for phy address 1 and 22 for
                  * phy address 2 and 3. Page select is shifted only for
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h

index a937f49..ca3be4f 100644 (file)
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -32,8 +32,8 @@
  
  #define DRV_NAME               "enic"
  #define DRV_DESCRIPTION                "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION            "1.4.1.10"
-#define DRV_COPYRIGHT          "Copyright 2008-2010 Cisco Systems, Inc"
+#define DRV_VERSION            "2.1.1.2"
+#define DRV_COPYRIGHT          "Copyright 2008-2011 Cisco Systems, Inc"
  
  #define ENIC_BARS_MAX          6
  
@@ -49,7 +49,7 @@ struct enic_msix_entry {
         void *devid;
  };
  
-#define ENIC_SET_APPLIED               (1 << 0)
+#define ENIC_PORT_REQUEST_APPLIED      (1 << 0)
  #define ENIC_SET_REQUEST               (1 << 1)
  #define ENIC_SET_NAME                  (1 << 2)
  #define ENIC_SET_INSTANCE              (1 << 3)
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c

index a0af48c..89664c6 100644 (file)
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -1318,18 +1318,20 @@ static int enic_set_port_profile(struct enic *enic, u8 *mac)
                 vic_provinfo_free(vp);
                 if (err)
                         return err;
-
-               enic->pp.set |= ENIC_SET_APPLIED;
                 break;
  
         case PORT_REQUEST_DISASSOCIATE:
-               enic->pp.set &= ~ENIC_SET_APPLIED;
                 break;
  
         default:
                 return -EINVAL;
         }
  
+       /* Set flag to indicate that the port assoc/disassoc
+        * request has been sent out to fw
+        */
+       enic->pp.set |= ENIC_PORT_REQUEST_APPLIED;
+
         return 0;
  }
  
@@ -1411,7 +1413,7 @@ static int enic_get_vf_port(struct net_device *netdev, int vf,
         int err, error, done;
         u16 response = PORT_PROFILE_RESPONSE_SUCCESS;
  
-       if (!(enic->pp.set & ENIC_SET_APPLIED))
+       if (!(enic->pp.set & ENIC_PORT_REQUEST_APPLIED))
                 return -ENODATA;
  
         err = enic_dev_init_done(enic, &done, &error);
diff --git a/drivers/net/igb/e1000_82575.c b/drivers/net/igb/e1000_82575.c

index 0a2368f..c1552b6 100644 (file)
--- a/drivers/net/igb/e1000_82575.c
+++ b/drivers/net/igb/e1000_82575.c
@@ -129,6 +129,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw)
                 break;
         case E1000_DEV_ID_82580_COPPER:
         case E1000_DEV_ID_82580_FIBER:
+       case E1000_DEV_ID_82580_QUAD_FIBER:
         case E1000_DEV_ID_82580_SERDES:
         case E1000_DEV_ID_82580_SGMII:
         case E1000_DEV_ID_82580_COPPER_DUAL:
diff --git a/drivers/net/igb/e1000_hw.h b/drivers/net/igb/e1000_hw.h

index e2638af..281324e 100644 (file)
--- a/drivers/net/igb/e1000_hw.h
+++ b/drivers/net/igb/e1000_hw.h
@@ -54,6 +54,7 @@ struct e1000_hw;
  #define E1000_DEV_ID_82580_SERDES             0x1510
  #define E1000_DEV_ID_82580_SGMII              0x1511
  #define E1000_DEV_ID_82580_COPPER_DUAL        0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER         0x1527
  #define E1000_DEV_ID_DH89XXCC_SGMII           0x0438
  #define E1000_DEV_ID_DH89XXCC_SERDES          0x043A
  #define E1000_DEV_ID_DH89XXCC_BACKPLANE       0x043C
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c

index 58c665b..200cc32 100644 (file)
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -68,6 +68,7 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c

index c7a6c44..9f6d670 100644 (file)
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -592,8 +592,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                         ppp_release(NULL, file);
                         err = 0;
                 } else
-                       printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
-                              atomic_long_read(&file->f_count));
+                       pr_warn("PPPIOCDETACH file->f_count=%ld\n",
+                               atomic_long_read(&file->f_count));
                 mutex_unlock(&ppp_mutex);
                 return err;
         }
@@ -630,7 +630,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  
         if (pf->kind != INTERFACE) {
                 /* can't happen */
-               printk(KERN_ERR "PPP: not interface or channel??\n");
+               pr_err("PPP: not interface or channel??\n");
                 return -EINVAL;
         }
  
@@ -704,7 +704,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                 }
                 vj = slhc_init(val2+1, val+1);
                 if (!vj) {
-                       printk(KERN_ERR "PPP: no memory (VJ compressor)\n");
+                       netdev_err(ppp->dev,
+                                  "PPP: no memory (VJ compressor)\n");
                         err = -ENOMEM;
                         break;
                 }
@@ -898,17 +899,17 @@ static int __init ppp_init(void)
  {
         int err;
  
-       printk(KERN_INFO "PPP generic driver version " PPP_VERSION "\n");
+       pr_info("PPP generic driver version " PPP_VERSION "\n");
  
         err = register_pernet_device(&ppp_net_ops);
         if (err) {
-               printk(KERN_ERR "failed to register PPP pernet device (%d)\n", err);
+               pr_err("failed to register PPP pernet device (%d)\n", err);
                 goto out;
         }
  
         err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops);
         if (err) {
-               printk(KERN_ERR "failed to register PPP device (%d)\n", err);
+               pr_err("failed to register PPP device (%d)\n", err);
                 goto out_net;
         }
  
@@ -1078,7 +1079,7 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb)
         new_skb = alloc_skb(new_skb_size, GFP_ATOMIC);
         if (!new_skb) {
                 if (net_ratelimit())
-                       printk(KERN_ERR "PPP: no memory (comp pkt)\n");
+                       netdev_err(ppp->dev, "PPP: no memory (comp pkt)\n");
                 return NULL;
         }
         if (ppp->dev->hard_header_len > PPP_HDRLEN)
@@ -1108,7 +1109,7 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb)
                  * the same number.
                  */
                 if (net_ratelimit())
-                       printk(KERN_ERR "ppp: compressor dropped pkt\n");
+                       netdev_err(ppp->dev, "ppp: compressor dropped pkt\n");
                 kfree_skb(skb);
                 kfree_skb(new_skb);
                 new_skb = NULL;
@@ -1138,7 +1139,9 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                 if (ppp->pass_filter &&
                     sk_run_filter(skb, ppp->pass_filter) == 0) {
                         if (ppp->debug & 1)
-                               printk(KERN_DEBUG "PPP: outbound frame not passed\n");
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "PPP: outbound frame "
+                                             "not passed\n");
                         kfree_skb(skb);
                         return;
                 }
@@ -1164,7 +1167,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                 new_skb = alloc_skb(skb->len + ppp->dev->hard_header_len - 2,
                                     GFP_ATOMIC);
                 if (!new_skb) {
-                       printk(KERN_ERR "PPP: no memory (VJ comp pkt)\n");
+                       netdev_err(ppp->dev, "PPP: no memory (VJ comp pkt)\n");
                         goto drop;
                 }
                 skb_reserve(new_skb, ppp->dev->hard_header_len - 2);
@@ -1202,7 +1205,9 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
             proto != PPP_LCP && proto != PPP_CCP) {
                 if (!(ppp->flags & SC_CCP_UP) && (ppp->flags & SC_MUST_COMP)) {
                         if (net_ratelimit())
-                               printk(KERN_ERR "ppp: compression required but down - pkt dropped.\n");
+                               netdev_err(ppp->dev,
+                                          "ppp: compression required but "
+                                          "down - pkt dropped.\n");
                         goto drop;
                 }
                 skb = pad_compress_skb(ppp, skb);
@@ -1505,7 +1510,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
   noskb:
         spin_unlock_bh(&pch->downl);
         if (ppp->debug & 1)
-               printk(KERN_ERR "PPP: no memory (fragment)\n");
+               netdev_err(ppp->dev, "PPP: no memory (fragment)\n");
         ++ppp->dev->stats.tx_errors;
         ++ppp->nxseq;
         return 1;       /* abandon the frame */
@@ -1686,7 +1691,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                         /* copy to a new sk_buff with more tailroom */
                         ns = dev_alloc_skb(skb->len + 128);
                         if (!ns) {
-                               printk(KERN_ERR"PPP: no memory (VJ decomp)\n");
+                               netdev_err(ppp->dev, "PPP: no memory "
+                                          "(VJ decomp)\n");
                                 goto err;
                         }
                         skb_reserve(ns, 2);
@@ -1699,7 +1705,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
  
                 len = slhc_uncompress(ppp->vj, skb->data + 2, skb->len - 2);
                 if (len <= 0) {
-                       printk(KERN_DEBUG "PPP: VJ decompression error\n");
+                       netdev_printk(KERN_DEBUG, ppp->dev,
+                                     "PPP: VJ decompression error\n");
                         goto err;
                 }
                 len += 2;
@@ -1721,7 +1728,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                         goto err;
  
                 if (slhc_remember(ppp->vj, skb->data + 2, skb->len - 2) <= 0) {
-                       printk(KERN_ERR "PPP: VJ uncompressed error\n");
+                       netdev_err(ppp->dev, "PPP: VJ uncompressed error\n");
                         goto err;
                 }
                 proto = PPP_IP;
@@ -1762,8 +1769,9 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                         if (ppp->pass_filter &&
                             sk_run_filter(skb, ppp->pass_filter) == 0) {
                                 if (ppp->debug & 1)
-                                       printk(KERN_DEBUG "PPP: inbound frame "
-                                              "not passed\n");
+                                       netdev_printk(KERN_DEBUG, ppp->dev,
+                                                     "PPP: inbound frame "
+                                                     "not passed\n");
                                 kfree_skb(skb);
                                 return;
                         }
@@ -1821,7 +1829,8 @@ ppp_decompress_frame(struct ppp *ppp, struct sk_buff *skb)
  
                 ns = dev_alloc_skb(obuff_size);
                 if (!ns) {
-                       printk(KERN_ERR "ppp_decompress_frame: no memory\n");
+                       netdev_err(ppp->dev, "ppp_decompress_frame: "
+                                  "no memory\n");
                         goto err;
                 }
                 /* the decompressor still expects the A/C bytes in the hdr */
@@ -1989,7 +1998,7 @@ ppp_mp_reconstruct(struct ppp *ppp)
         u32 seq = ppp->nextseq;
         u32 minseq = ppp->minseq;
         struct sk_buff_head *list = &ppp->mrq;
-       struct sk_buff *p, *next;
+       struct sk_buff *p, *tmp;
         struct sk_buff *head, *tail;
         struct sk_buff *skb = NULL;
         int lost = 0, len = 0;
@@ -1998,13 +2007,15 @@ ppp_mp_reconstruct(struct ppp *ppp)
                 return NULL;
         head = list->next;
         tail = NULL;
-       for (p = head; p != (struct sk_buff *) list; p = next) {
-               next = p->next;
+       skb_queue_walk_safe(list, p, tmp) {
+       again:
                 if (seq_before(PPP_MP_CB(p)->sequence, seq)) {
                         /* this can't happen, anyway ignore the skb */
-                       printk(KERN_ERR "ppp_mp_reconstruct bad seq %u < %u\n",
-                              PPP_MP_CB(p)->sequence, seq);
-                       head = next;
+                       netdev_err(ppp->dev, "ppp_mp_reconstruct bad "
+                                  "seq %u < %u\n",
+                                  PPP_MP_CB(p)->sequence, seq);
+                       __skb_unlink(p, list);
+                       kfree_skb(p);
                         continue;
                 }
                 if (PPP_MP_CB(p)->sequence != seq) {
@@ -2016,8 +2027,7 @@ ppp_mp_reconstruct(struct ppp *ppp)
                         lost = 1;
                         seq = seq_before(minseq, PPP_MP_CB(p)->sequence)?
                                 minseq + 1: PPP_MP_CB(p)->sequence;
-                       next = p;
-                       continue;
+                       goto again;
                 }
  
                 /*
@@ -2042,17 +2052,9 @@ ppp_mp_reconstruct(struct ppp *ppp)
                     (PPP_MP_CB(head)->BEbits & B)) {
                         if (len > ppp->mrru + 2) {
                                 ++ppp->dev->stats.rx_length_errors;
-                               printk(KERN_DEBUG "PPP: reconstructed packet"
-                                      " is too long (%d)\n", len);
-                       } else if (p == head) {
-                               /* fragment is complete packet - reuse skb */
-                               tail = p;
-                               skb = skb_get(p);
-                               break;
-                       } else if ((skb = dev_alloc_skb(len)) == NULL) {
-                               ++ppp->dev->stats.rx_missed_errors;
-                               printk(KERN_DEBUG "PPP: no memory for "
-                                      "reconstructed packet");
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "PPP: reconstructed packet"
+                                             " is too long (%d)\n", len);
                         } else {
                                 tail = p;
                                 break;
@@ -2065,9 +2067,17 @@ ppp_mp_reconstruct(struct ppp *ppp)
                  * and we haven't found a complete valid packet yet,
                  * we can discard up to and including this fragment.
                  */
-               if (PPP_MP_CB(p)->BEbits & E)
-                       head = next;
+               if (PPP_MP_CB(p)->BEbits & E) {
+                       struct sk_buff *tmp2;
  
+                       skb_queue_reverse_walk_from_safe(list, p, tmp2) {
+                               __skb_unlink(p, list);
+                               kfree_skb(p);
+                       }
+                       head = skb_peek(list);
+                       if (!head)
+                               break;
+               }
                 ++seq;
         }
  
@@ -2077,26 +2087,37 @@ ppp_mp_reconstruct(struct ppp *ppp)
                    signal a receive error. */
                 if (PPP_MP_CB(head)->sequence != ppp->nextseq) {
                         if (ppp->debug & 1)
-                               printk(KERN_DEBUG "  missed pkts %u..%u\n",
-                                      ppp->nextseq,
-                                      PPP_MP_CB(head)->sequence-1);
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "  missed pkts %u..%u\n",
+                                             ppp->nextseq,
+                                             PPP_MP_CB(head)->sequence-1);
                         ++ppp->dev->stats.rx_dropped;
                         ppp_receive_error(ppp);
                 }
  
-               if (head != tail)
-                       /* copy to a single skb */
-                       for (p = head; p != tail->next; p = p->next)
-                               skb_copy_bits(p, 0, skb_put(skb, p->len), p->len);
-               ppp->nextseq = PPP_MP_CB(tail)->sequence + 1;
-               head = tail->next;
-       }
+               skb = head;
+               if (head != tail) {
+                       struct sk_buff **fragpp = &skb_shinfo(skb)->frag_list;
+                       p = skb_queue_next(list, head);
+                       __skb_unlink(skb, list);
+                       skb_queue_walk_from_safe(list, p, tmp) {
+                               __skb_unlink(p, list);
+                               *fragpp = p;
+                               p->next = NULL;
+                               fragpp = &p->next;
+
+                               skb->len += p->len;
+                               skb->data_len += p->len;
+                               skb->truesize += p->len;
+
+                               if (p == tail)
+                                       break;
+                       }
+               } else {
+                       __skb_unlink(skb, list);
+               }
  
-       /* Discard all the skbuffs that we have copied the data out of
-          or that we can't use. */
-       while ((p = list->next) != head) {
-               __skb_unlink(p, list);
-               kfree_skb(p);
+               ppp->nextseq = PPP_MP_CB(tail)->sequence + 1;
         }
  
         return skb;
@@ -2617,8 +2638,8 @@ ppp_create_interface(struct net *net, int unit, int *retp)
         ret = register_netdev(dev);
         if (ret != 0) {
                 unit_put(&pn->units_idr, unit);
-               printk(KERN_ERR "PPP: couldn't register device %s (%d)\n",
-                      dev->name, ret);
+               netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n",
+                          dev->name, ret);
                 goto out2;
         }
  
@@ -2690,9 +2711,9 @@ static void ppp_destroy_interface(struct ppp *ppp)
  
         if (!ppp->file.dead || ppp->n_channels) {
                 /* "can't happen" */
-               printk(KERN_ERR "ppp: destroying ppp struct %p but dead=%d "
-                      "n_channels=%d !\n", ppp, ppp->file.dead,
-                      ppp->n_channels);
+               netdev_err(ppp->dev, "ppp: destroying ppp struct %p "
+                          "but dead=%d n_channels=%d !\n",
+                          ppp, ppp->file.dead, ppp->n_channels);
                 return;
         }
  
@@ -2834,8 +2855,7 @@ static void ppp_destroy_channel(struct channel *pch)
  
         if (!pch->file.dead) {
                 /* "can't happen" */
-               printk(KERN_ERR "ppp: destroying undead channel %p !\n",
-                      pch);
+               pr_err("ppp: destroying undead channel %p !\n", pch);
                 return;
         }
         skb_queue_purge(&pch->file.xq);
@@ -2847,7 +2867,7 @@ static void __exit ppp_cleanup(void)
  {
         /* should never happen */
         if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count))
-               printk(KERN_ERR "PPP: removing module but units remain!\n");
+               pr_err("PPP: removing module but units remain!\n");
         unregister_chrdev(PPP_MAJOR, "ppp");
         device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0));
         class_destroy(ppp_class);
@@ -2865,7 +2885,7 @@ static int __unit_alloc(struct idr *p, void *ptr, int n)
  
  again:
         if (!idr_pre_get(p, GFP_KERNEL)) {
-               printk(KERN_ERR "PPP: No free memory for idr\n");
+               pr_err("PPP: No free memory for idr\n");
                 return -ENOMEM;
         }
  
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c

index 09cac70..0d6fec6 100644 (file)
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2923,6 +2923,7 @@ static u16 wol_calc_crc(int size, u8 *pattern, u8 *mask_pattern)
  static int velocity_set_wol(struct velocity_info *vptr)
  {
         struct mac_regs __iomem *regs = vptr->mac_regs;
+       enum speed_opt spd_dpx = vptr->options.spd_dpx;
         static u8 buf[256];
         int i;
  
@@ -2968,6 +2969,12 @@ static int velocity_set_wol(struct velocity_info *vptr)
  
         writew(0x0FFF, &regs->WOLSRClr);
  
+       if (spd_dpx == SPD_DPX_1000_FULL)
+               goto mac_done;
+
+       if (spd_dpx != SPD_DPX_AUTO)
+               goto advertise_done;
+
         if (vptr->mii_status & VELOCITY_AUTONEG_ENABLE) {
                 if (PHYID_GET_PHY_ID(vptr->phy_id) == PHYID_CICADA_CS8201)
                         MII_REG_BITS_ON(AUXCR_MDPPS, MII_NCONFIG, vptr->mac_regs);
@@ -2978,6 +2985,7 @@ static int velocity_set_wol(struct velocity_info *vptr)
         if (vptr->mii_status & VELOCITY_SPEED_1000)
                 MII_REG_BITS_ON(BMCR_ANRESTART, MII_BMCR, vptr->mac_regs);
  
+advertise_done:
         BYTE_REG_BITS_ON(CHIPGCR_FCMODE, &regs->CHIPGCR);
  
         {
@@ -2987,6 +2995,7 @@ static int velocity_set_wol(struct velocity_info *vptr)
                 writeb(GCR, &regs->CHIPGCR);
         }
  
+mac_done:
         BYTE_REG_BITS_OFF(ISR_PWEI, &regs->ISR);
         /* Turn on SWPTAG just before entering power mode */
         BYTE_REG_BITS_ON(STICKHW_SWPTAG, &regs->STICKHW);
diff --git a/drivers/net/via-velocity.h b/drivers/net/via-velocity.h

index aa2e69b..d722753 100644 (file)
--- a/drivers/net/via-velocity.h
+++ b/drivers/net/via-velocity.h
@@ -361,7 +361,7 @@ enum  velocity_owner {
  #define MAC_REG_CHIPGSR     0x9C
  #define MAC_REG_TESTCFG     0x9D
  #define MAC_REG_DEBUG       0x9E
-#define MAC_REG_CHIPGCR     0x9F
+#define MAC_REG_CHIPGCR     0x9F       /* Chip Operation and Diagnostic Control */
  #define MAC_REG_WOLCR0_SET  0xA0
  #define MAC_REG_WOLCR1_SET  0xA1
  #define MAC_REG_PWCFG_SET   0xA2
@@ -848,10 +848,10 @@ enum  velocity_owner {
   *     Bits in CHIPGCR register
   */
  
-#define CHIPGCR_FCGMII      0x80       /* enable GMII mode */
-#define CHIPGCR_FCFDX       0x40
+#define CHIPGCR_FCGMII      0x80       /* force GMII (else MII only) */
+#define CHIPGCR_FCFDX       0x40       /* force full duplex */
  #define CHIPGCR_FCRESV      0x20
-#define CHIPGCR_FCMODE      0x10
+#define CHIPGCR_FCMODE      0x10       /* enable MAC forced mode */
  #define CHIPGCR_LPSOPT      0x08
  #define CHIPGCR_TM1US       0x04
  #define CHIPGCR_TM0US       0x02
diff --git a/drivers/net/vxge/vxge-config.c b/drivers/net/vxge/vxge-config.c

index 01c05f5..77097e3 100644 (file)
--- a/drivers/net/vxge/vxge-config.c
+++ b/drivers/net/vxge/vxge-config.c
@@ -387,8 +387,8 @@ vxge_hw_vpath_eprom_img_ver_get(struct __vxge_hw_device *hldev,
                 data1 = steer_ctrl = 0;
  
                 status = vxge_hw_vpath_fw_api(vpath,
-                       VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
                         VXGE_HW_FW_API_GET_EPROM_REV,
+                       VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
                         0, &data0, &data1, &steer_ctrl);
                 if (status != VXGE_HW_OK)
                         break;
@@ -2868,6 +2868,8 @@ __vxge_hw_ring_create(struct __vxge_hw_vpath_handle *vp,
         ring->rxd_init = attr->rxd_init;
         ring->rxd_term = attr->rxd_term;
         ring->buffer_mode = config->buffer_mode;
+       ring->tim_rti_cfg1_saved = vp->vpath->tim_rti_cfg1_saved;
+       ring->tim_rti_cfg3_saved = vp->vpath->tim_rti_cfg3_saved;
         ring->rxds_limit = config->rxds_limit;
  
         ring->rxd_size = vxge_hw_ring_rxd_size_get(config->buffer_mode);
@@ -3511,6 +3513,8 @@ __vxge_hw_fifo_create(struct __vxge_hw_vpath_handle *vp,
  
         /* apply "interrupts per txdl" attribute */
         fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_UTILZ;
+       fifo->tim_tti_cfg1_saved = vpath->tim_tti_cfg1_saved;
+       fifo->tim_tti_cfg3_saved = vpath->tim_tti_cfg3_saved;
  
         if (fifo->config->intr)
                 fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_PER_LIST;
@@ -4377,6 +4381,8 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                 }
  
                 writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+               vpath->tim_tti_cfg1_saved = val64;
+
                 val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_TX]);
  
                 if (config->tti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) {
@@ -4433,6 +4439,7 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                 }
  
                 writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]);
+               vpath->tim_tti_cfg3_saved = val64;
         }
  
         if (config->ring.enable == VXGE_HW_RING_ENABLE) {
@@ -4481,6 +4488,8 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                 }
  
                 writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]);
+               vpath->tim_rti_cfg1_saved = val64;
+
                 val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_RX]);
  
                 if (config->rti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) {
@@ -4537,6 +4546,7 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                 }
  
                 writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]);
+               vpath->tim_rti_cfg3_saved = val64;
         }
  
         val64 = 0;
@@ -4555,26 +4565,6 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
         return status;
  }
  
-void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id)
-{
-       struct __vxge_hw_virtualpath *vpath;
-       struct vxge_hw_vpath_reg __iomem *vp_reg;
-       struct vxge_hw_vp_config *config;
-       u64 val64;
-
-       vpath = &hldev->virtual_paths[vp_id];
-       vp_reg = vpath->vp_reg;
-       config = vpath->vp_config;
-
-       if (config->fifo.enable == VXGE_HW_FIFO_ENABLE &&
-           config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) {
-               config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE;
-               val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
-               val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
-               writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
-       }
-}
-
  /*
   * __vxge_hw_vpath_initialize
   * This routine is the final phase of init which initializes the
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h

index e249e28..3c53aa7 100644 (file)
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -682,6 +682,10 @@ struct __vxge_hw_virtualpath {
         u32                             vsport_number;
         u32                             max_kdfc_db;
         u32                             max_nofl_db;
+       u64                             tim_tti_cfg1_saved;
+       u64                             tim_tti_cfg3_saved;
+       u64                             tim_rti_cfg1_saved;
+       u64                             tim_rti_cfg3_saved;
  
         struct __vxge_hw_ring *____cacheline_aligned ringh;
         struct __vxge_hw_fifo *____cacheline_aligned fifoh;
@@ -921,6 +925,9 @@ struct __vxge_hw_ring {
         u32                                     doorbell_cnt;
         u32                                     total_db_cnt;
         u64                                     rxds_limit;
+       u32                                     rtimer;
+       u64                                     tim_rti_cfg1_saved;
+       u64                                     tim_rti_cfg3_saved;
  
         enum vxge_hw_status (*callback)(
                         struct __vxge_hw_ring *ringh,
@@ -1000,6 +1007,9 @@ struct __vxge_hw_fifo {
         u32                                     per_txdl_space;
         u32                                     vp_id;
         u32                                     tx_intr_num;
+       u32                                     rtimer;
+       u64                                     tim_tti_cfg1_saved;
+       u64                                     tim_tti_cfg3_saved;
  
         enum vxge_hw_status (*callback)(
                         struct __vxge_hw_fifo *fifo_handle,
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c

index c81a651..e40f619 100644 (file)
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -371,9 +371,6 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
         struct vxge_hw_ring_rxd_info ext_info;
         vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d",
                 ring->ndev->name, __func__, __LINE__);
-       ring->pkts_processed = 0;
-
-       vxge_hw_ring_replenish(ringh);
  
         do {
                 prefetch((char *)dtr + L1_CACHE_BYTES);
@@ -1588,6 +1585,36 @@ static int vxge_reset_vpath(struct vxgedev *vdev, int vp_id)
         return ret;
  }
  
+/* Configure CI */
+static void vxge_config_ci_for_tti_rti(struct vxgedev *vdev)
+{
+       int i = 0;
+
+       /* Enable CI for RTI */
+       if (vdev->config.intr_type == MSI_X) {
+               for (i = 0; i < vdev->no_of_vpath; i++) {
+                       struct __vxge_hw_ring *hw_ring;
+
+                       hw_ring = vdev->vpaths[i].ring.handle;
+                       vxge_hw_vpath_dynamic_rti_ci_set(hw_ring);
+               }
+       }
+
+       /* Enable CI for TTI */
+       for (i = 0; i < vdev->no_of_vpath; i++) {
+               struct __vxge_hw_fifo *hw_fifo = vdev->vpaths[i].fifo.handle;
+               vxge_hw_vpath_tti_ci_set(hw_fifo);
+               /*
+                * For Inta (with or without napi), Set CI ON for only one
+                * vpath. (Have only one free running timer).
+                */
+               if ((vdev->config.intr_type == INTA) && (i == 0))
+                       break;
+       }
+
+       return;
+}
+
  static int do_vxge_reset(struct vxgedev *vdev, int event)
  {
         enum vxge_hw_status status;
@@ -1753,6 +1780,9 @@ static int do_vxge_reset(struct vxgedev *vdev, int event)
                 netif_tx_wake_all_queues(vdev->ndev);
         }
  
+       /* configure CI */
+       vxge_config_ci_for_tti_rti(vdev);
+
  out:
         vxge_debug_entryexit(VXGE_TRACE,
                 "%s:%d  Exiting...", __func__, __LINE__);
@@ -1793,22 +1823,29 @@ static void vxge_reset(struct work_struct *work)
   */
  static int vxge_poll_msix(struct napi_struct *napi, int budget)
  {
-       struct vxge_ring *ring =
-               container_of(napi, struct vxge_ring, napi);
+       struct vxge_ring *ring = container_of(napi, struct vxge_ring, napi);
+       int pkts_processed;
         int budget_org = budget;
-       ring->budget = budget;
  
+       ring->budget = budget;
+       ring->pkts_processed = 0;
         vxge_hw_vpath_poll_rx(ring->handle);
+       pkts_processed = ring->pkts_processed;
  
         if (ring->pkts_processed < budget_org) {
                 napi_complete(napi);
+
                 /* Re enable the Rx interrupts for the vpath */
                 vxge_hw_channel_msix_unmask(
                                 (struct __vxge_hw_channel *)ring->handle,
                                 ring->rx_vector_no);
+               mmiowb();
         }
  
-       return ring->pkts_processed;
+       /* We are copying and returning the local variable, in case if after
+        * clearing the msix interrupt above, if the interrupt fires right
+        * away which can preempt this NAPI thread */
+       return pkts_processed;
  }
  
  static int vxge_poll_inta(struct napi_struct *napi, int budget)
@@ -1824,6 +1861,7 @@ static int vxge_poll_inta(struct napi_struct *napi, int budget)
         for (i = 0; i < vdev->no_of_vpath; i++) {
                 ring = &vdev->vpaths[i].ring;
                 ring->budget = budget;
+               ring->pkts_processed = 0;
                 vxge_hw_vpath_poll_rx(ring->handle);
                 pkts_processed += ring->pkts_processed;
                 budget -= ring->pkts_processed;
@@ -2054,6 +2092,7 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
                                         netdev_get_tx_queue(vdev->ndev, 0);
                         vpath->fifo.indicate_max_pkts =
                                 vdev->config.fifo_indicate_max_pkts;
+                       vpath->fifo.tx_vector_no = 0;
                         vpath->ring.rx_vector_no = 0;
                         vpath->ring.rx_csum = vdev->rx_csum;
                         vpath->ring.rx_hwts = vdev->rx_hwts;
@@ -2079,6 +2118,61 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
         return VXGE_HW_OK;
  }
  
+/**
+ *  adaptive_coalesce_tx_interrupts - Changes the interrupt coalescing
+ *  if the interrupts are not within a range
+ *  @fifo: pointer to transmit fifo structure
+ *  Description: The function changes boundary timer and restriction timer
+ *  value depends on the traffic
+ *  Return Value: None
+ */
+static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo)
+{
+       fifo->interrupt_count++;
+       if (jiffies > fifo->jiffies + HZ / 100) {
+               struct __vxge_hw_fifo *hw_fifo = fifo->handle;
+
+               fifo->jiffies = jiffies;
+               if (fifo->interrupt_count > VXGE_T1A_MAX_TX_INTERRUPT_COUNT &&
+                   hw_fifo->rtimer != VXGE_TTI_RTIMER_ADAPT_VAL) {
+                       hw_fifo->rtimer = VXGE_TTI_RTIMER_ADAPT_VAL;
+                       vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo);
+               } else if (hw_fifo->rtimer != 0) {
+                       hw_fifo->rtimer = 0;
+                       vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo);
+               }
+               fifo->interrupt_count = 0;
+       }
+}
+
+/**
+ *  adaptive_coalesce_rx_interrupts - Changes the interrupt coalescing
+ *  if the interrupts are not within a range
+ *  @ring: pointer to receive ring structure
+ *  Description: The function increases of decreases the packet counts within
+ *  the ranges of traffic utilization, if the interrupts due to this ring are
+ *  not within a fixed range.
+ *  Return Value: Nothing
+ */
+static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring)
+{
+       ring->interrupt_count++;
+       if (jiffies > ring->jiffies + HZ / 100) {
+               struct __vxge_hw_ring *hw_ring = ring->handle;
+
+               ring->jiffies = jiffies;
+               if (ring->interrupt_count > VXGE_T1A_MAX_INTERRUPT_COUNT &&
+                   hw_ring->rtimer != VXGE_RTI_RTIMER_ADAPT_VAL) {
+                       hw_ring->rtimer = VXGE_RTI_RTIMER_ADAPT_VAL;
+                       vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring);
+               } else if (hw_ring->rtimer != 0) {
+                       hw_ring->rtimer = 0;
+                       vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring);
+               }
+               ring->interrupt_count = 0;
+       }
+}
+
  /*
   *  vxge_isr_napi
   *  @irq: the irq of the device.
@@ -2139,24 +2233,39 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
  
  #ifdef CONFIG_PCI_MSI
  
-static irqreturn_t
-vxge_tx_msix_handle(int irq, void *dev_id)
+static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
  {
         struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
  
+       adaptive_coalesce_tx_interrupts(fifo);
+
+       vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)fifo->handle,
+                                 fifo->tx_vector_no);
+
+       vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)fifo->handle,
+                                  fifo->tx_vector_no);
+
         VXGE_COMPLETE_VPATH_TX(fifo);
  
+       vxge_hw_channel_msix_unmask((struct __vxge_hw_channel *)fifo->handle,
+                                   fifo->tx_vector_no);
+
+       mmiowb();
+
         return IRQ_HANDLED;
  }
  
-static irqreturn_t
-vxge_rx_msix_napi_handle(int irq, void *dev_id)
+static irqreturn_t vxge_rx_msix_napi_handle(int irq, void *dev_id)
  {
         struct vxge_ring *ring = (struct vxge_ring *)dev_id;
  
-       /* MSIX_IDX for Rx is 1 */
+       adaptive_coalesce_rx_interrupts(ring);
+
         vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)ring->handle,
-                                       ring->rx_vector_no);
+                                 ring->rx_vector_no);
+
+       vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)ring->handle,
+                                  ring->rx_vector_no);
  
         napi_schedule(&ring->napi);
         return IRQ_HANDLED;
@@ -2173,14 +2282,20 @@ vxge_alarm_msix_handle(int irq, void *dev_id)
                 VXGE_HW_VPATH_MSIX_ACTIVE) + VXGE_ALARM_MSIX_ID;
  
         for (i = 0; i < vdev->no_of_vpath; i++) {
+               /* Reduce the chance of loosing alarm interrupts by masking
+                * the vector. A pending bit will be set if an alarm is
+                * generated and on unmask the interrupt will be fired.
+                */
                 vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id);
+               vxge_hw_vpath_msix_clear(vdev->vpaths[i].handle, msix_id);
+               mmiowb();
  
                 status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle,
                         vdev->exec_mode);
                 if (status == VXGE_HW_OK) {
-
                         vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle,
-                                       msix_id);
+                                                 msix_id);
+                       mmiowb();
                         continue;
                 }
                 vxge_debug_intr(VXGE_ERR,
@@ -2299,6 +2414,9 @@ static int vxge_enable_msix(struct vxgedev *vdev)
                         vpath->ring.rx_vector_no = (vpath->device_id *
                                                 VXGE_HW_VPATH_MSIX_ACTIVE) + 1;
  
+                       vpath->fifo.tx_vector_no = (vpath->device_id *
+                                               VXGE_HW_VPATH_MSIX_ACTIVE);
+
                         vxge_hw_vpath_msix_set(vpath->handle, tim_msix_id,
                                                VXGE_ALARM_MSIX_ID);
                 }
@@ -2474,8 +2592,9 @@ INTA_MODE:
                         "%s:vxge:INTA", vdev->ndev->name);
                 vxge_hw_device_set_intr_type(vdev->devh,
                         VXGE_HW_INTR_MODE_IRQLINE);
-               vxge_hw_vpath_tti_ci_set(vdev->devh,
-                       vdev->vpaths[0].device_id);
+
+               vxge_hw_vpath_tti_ci_set(vdev->vpaths[0].fifo.handle);
+
                 ret = request_irq((int) vdev->pdev->irq,
                         vxge_isr_napi,
                         IRQF_SHARED, vdev->desc[0], vdev);
@@ -2745,6 +2864,10 @@ static int vxge_open(struct net_device *dev)
         }
  
         netif_tx_start_all_queues(vdev->ndev);
+
+       /* configure CI */
+       vxge_config_ci_for_tti_rti(vdev);
+
         goto out0;
  
  out2:
@@ -3348,7 +3471,7 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
                 vxge_debug_init(VXGE_ERR,
                         "%s: vpath memory allocation failed",
                         vdev->ndev->name);
-               ret = -ENODEV;
+               ret = -ENOMEM;
                 goto _out1;
         }
  
@@ -3369,11 +3492,11 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
         if (vdev->config.gro_enable)
                 ndev->features |= NETIF_F_GRO;
  
-       if (register_netdev(ndev)) {
+       ret = register_netdev(ndev);
+       if (ret) {
                 vxge_debug_init(vxge_hw_device_trace_level_get(hldev),
                         "%s: %s : device registration failed!",
                         ndev->name, __func__);
-               ret = -ENODEV;
                 goto _out2;
         }
  
@@ -3444,6 +3567,11 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
         /* in 2.6 will call stop() if device is up */
         unregister_netdev(dev);
  
+       kfree(vdev->vpaths);
+
+       /* we are safe to free it now */
+       free_netdev(dev);
+
         vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
                         buf);
         vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d  Exiting...", buf,
@@ -3799,7 +3927,7 @@ static void __devinit vxge_device_config_init(
                 break;
  
         case MSI_X:
-               device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX;
+               device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX_ONE_SHOT;
                 break;
         }
  
@@ -4335,10 +4463,10 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
                 goto _exit1;
         }
  
-       if (pci_request_region(pdev, 0, VXGE_DRIVER_NAME)) {
+       ret = pci_request_region(pdev, 0, VXGE_DRIVER_NAME);
+       if (ret) {
                 vxge_debug_init(VXGE_ERR,
                         "%s : request regions failed", __func__);
-               ret = -ENODEV;
                 goto _exit1;
         }
  
@@ -4446,7 +4574,7 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
                         if (!img[i].is_valid)
                                 break;
                         vxge_debug_init(VXGE_TRACE, "%s: EPROM %d, version "
-                                       "%d.%d.%d.%d\n", VXGE_DRIVER_NAME, i,
+                                       "%d.%d.%d.%d", VXGE_DRIVER_NAME, i,
                                         VXGE_EPROM_IMG_MAJOR(img[i].version),
                                         VXGE_EPROM_IMG_MINOR(img[i].version),
                                         VXGE_EPROM_IMG_FIX(img[i].version),
@@ -4643,8 +4771,9 @@ _exit6:
  _exit5:
         vxge_device_unregister(hldev);
  _exit4:
-       pci_disable_sriov(pdev);
+       pci_set_drvdata(pdev, NULL);
         vxge_hw_device_terminate(hldev);
+       pci_disable_sriov(pdev);
  _exit3:
         iounmap(attr.bar0);
  _exit2:
@@ -4655,7 +4784,7 @@ _exit0:
         kfree(ll_config);
         kfree(device_config);
         driver_config->config_dev_cnt--;
-       pci_set_drvdata(pdev, NULL);
+       driver_config->total_dev_cnt--;
         return ret;
  }
  
@@ -4668,45 +4797,34 @@ _exit0:
  static void __devexit vxge_remove(struct pci_dev *pdev)
  {
         struct __vxge_hw_device *hldev;
-       struct vxgedev *vdev = NULL;
-       struct net_device *dev;
-       int i = 0;
+       struct vxgedev *vdev;
+       int i;
  
         hldev = pci_get_drvdata(pdev);
-
         if (hldev == NULL)
                 return;
  
-       dev = hldev->ndev;
-       vdev = netdev_priv(dev);
+       vdev = netdev_priv(hldev->ndev);
  
         vxge_debug_entryexit(vdev->level_trace, "%s:%d", __func__, __LINE__);
-
         vxge_debug_init(vdev->level_trace, "%s : removing PCI device...",
                         __func__);
-       vxge_device_unregister(hldev);
  
-       for (i = 0; i < vdev->no_of_vpath; i++) {
+       for (i = 0; i < vdev->no_of_vpath; i++)
                 vxge_free_mac_add_list(&vdev->vpaths[i]);
-               vdev->vpaths[i].mcast_addr_cnt = 0;
-               vdev->vpaths[i].mac_addr_cnt = 0;
-       }
-
-       kfree(vdev->vpaths);
  
+       vxge_device_unregister(hldev);
+       pci_set_drvdata(pdev, NULL);
+       /* Do not call pci_disable_sriov here, as it will break child devices */
+       vxge_hw_device_terminate(hldev);
         iounmap(vdev->bar0);
-
-       /* we are safe to free it now */
-       free_netdev(dev);
+       pci_release_region(pdev, 0);
+       pci_disable_device(pdev);
+       driver_config->config_dev_cnt--;
+       driver_config->total_dev_cnt--;
  
         vxge_debug_init(vdev->level_trace, "%s:%d Device unregistered",
                         __func__, __LINE__);
-
-       vxge_hw_device_terminate(hldev);
-
-       pci_disable_device(pdev);
-       pci_release_region(pdev, 0);
-       pci_set_drvdata(pdev, NULL);
         vxge_debug_entryexit(vdev->level_trace, "%s:%d  Exiting...", __func__,
                              __LINE__);
  }
diff --git a/drivers/net/vxge/vxge-main.h b/drivers/net/vxge/vxge-main.h

index 5746fed..40474f0 100644 (file)
--- a/drivers/net/vxge/vxge-main.h
+++ b/drivers/net/vxge/vxge-main.h
@@ -59,11 +59,13 @@
  #define VXGE_TTI_LTIMER_VAL    1000
  #define VXGE_T1A_TTI_LTIMER_VAL        80
  #define VXGE_TTI_RTIMER_VAL    0
+#define VXGE_TTI_RTIMER_ADAPT_VAL      10
  #define VXGE_T1A_TTI_RTIMER_VAL        400
  #define VXGE_RTI_BTIMER_VAL    250
  #define VXGE_RTI_LTIMER_VAL    100
  #define VXGE_RTI_RTIMER_VAL    0
-#define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH
+#define VXGE_RTI_RTIMER_ADAPT_VAL      15
+#define VXGE_FIFO_INDICATE_MAX_PKTS    VXGE_DEF_FIFO_LENGTH
  #define VXGE_ISR_POLLING_CNT   8
  #define VXGE_MAX_CONFIG_DEV    0xFF
  #define VXGE_EXEC_MODE_DISABLE 0
@@ -107,6 +109,14 @@
  #define RTI_T1A_RX_UFC_C       50
  #define RTI_T1A_RX_UFC_D       60
  
+/*
+ * The interrupt rate is maintained at 3k per second with the moderation
+ * parameters for most traffic but not all. This is the maximum interrupt
+ * count allowed per function with INTA or per vector in the case of
+ * MSI-X in a 10 millisecond time period. Enabled only for Titan 1A.
+ */
+#define VXGE_T1A_MAX_INTERRUPT_COUNT   100
+#define VXGE_T1A_MAX_TX_INTERRUPT_COUNT        200
  
  /* Milli secs timer period */
  #define VXGE_TIMER_DELAY               10000
@@ -247,6 +257,11 @@ struct vxge_fifo {
         int tx_steering_type;
         int indicate_max_pkts;
  
+       /* Adaptive interrupt moderation parameters used in T1A */
+       unsigned long interrupt_count;
+       unsigned long jiffies;
+
+       u32 tx_vector_no;
         /* Tx stats */
         struct vxge_fifo_stats stats;
  } ____cacheline_aligned;
@@ -271,6 +286,10 @@ struct vxge_ring {
          */
         int driver_id;
  
+       /* Adaptive interrupt moderation parameters used in T1A */
+       unsigned long interrupt_count;
+       unsigned long jiffies;
+
         /* copy of the flag indicating whether rx_csum is to be used */
         u32 rx_csum:1,
             rx_hwts:1;
@@ -286,7 +305,7 @@ struct vxge_ring {
  
         int vlan_tag_strip;
         struct vlan_group *vlgrp;
-       int rx_vector_no;
+       u32 rx_vector_no;
         enum vxge_hw_status last_status;
  
         /* Rx stats */
diff --git a/drivers/net/vxge/vxge-traffic.c b/drivers/net/vxge/vxge-traffic.c

index 4c10d6c..8674f33 100644 (file)
--- a/drivers/net/vxge/vxge-traffic.c
+++ b/drivers/net/vxge/vxge-traffic.c
@@ -218,6 +218,68 @@ exit:
         return status;
  }
  
+void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo)
+{
+       struct vxge_hw_vpath_reg __iomem *vp_reg;
+       struct vxge_hw_vp_config *config;
+       u64 val64;
+
+       if (fifo->config->enable != VXGE_HW_FIFO_ENABLE)
+               return;
+
+       vp_reg = fifo->vp_reg;
+       config = container_of(fifo->config, struct vxge_hw_vp_config, fifo);
+
+       if (config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) {
+               config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE;
+               val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+               val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
+               fifo->tim_tti_cfg1_saved = val64;
+               writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+       }
+}
+
+void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring)
+{
+       u64 val64 = ring->tim_rti_cfg1_saved;
+
+       val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
+       ring->tim_rti_cfg1_saved = val64;
+       writeq(val64, &ring->vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]);
+}
+
+void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo)
+{
+       u64 val64 = fifo->tim_tti_cfg3_saved;
+       u64 timer = (fifo->rtimer * 1000) / 272;
+
+       val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff);
+       if (timer)
+               val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) |
+                       VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(5);
+
+       writeq(val64, &fifo->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]);
+       /* tti_cfg3_saved is not updated again because it is
+        * initialized at one place only - init time.
+        */
+}
+
+void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring)
+{
+       u64 val64 = ring->tim_rti_cfg3_saved;
+       u64 timer = (ring->rtimer * 1000) / 272;
+
+       val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff);
+       if (timer)
+               val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) |
+                       VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(4);
+
+       writeq(val64, &ring->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]);
+       /* rti_cfg3_saved is not updated again because it is
+        * initialized at one place only - init time.
+        */
+}
+
  /**
   * vxge_hw_channel_msix_mask - Mask MSIX Vector.
   * @channeh: Channel for rx or tx handle
@@ -253,6 +315,23 @@ vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channel, int msix_id)
                 &channel->common_reg->clear_msix_mask_vect[msix_id%4]);
  }
  
+/**
+ * vxge_hw_channel_msix_clear - Unmask the MSIX Vector.
+ * @channel: Channel for rx or tx handle
+ * @msix_id:  MSI ID
+ *
+ * The function unmasks the msix interrupt for the given msix_id
+ * if configured in MSIX oneshot mode
+ *
+ * Returns: 0
+ */
+void vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channel, int msix_id)
+{
+       __vxge_hw_pio_mem_write32_upper(
+               (u32) vxge_bVALn(vxge_mBIT(msix_id >> 2), 0, 32),
+               &channel->common_reg->clr_msix_one_shot_vec[msix_id % 4]);
+}
+
  /**
   * vxge_hw_device_set_intr_type - Updates the configuration
   *             with new interrupt type.
@@ -2190,20 +2269,15 @@ vxge_hw_vpath_msix_set(struct __vxge_hw_vpath_handle *vp, int *tim_msix_id,
  
         if (vpath->hldev->config.intr_mode ==
                                         VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) {
+               __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
+                               VXGE_HW_ONE_SHOT_VECT0_EN_ONE_SHOT_VECT0_EN,
+                               0, 32), &vp_reg->one_shot_vect0_en);
                 __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
                                 VXGE_HW_ONE_SHOT_VECT1_EN_ONE_SHOT_VECT1_EN,
                                 0, 32), &vp_reg->one_shot_vect1_en);
-       }
-
-       if (vpath->hldev->config.intr_mode ==
-               VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) {
                 __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
                                 VXGE_HW_ONE_SHOT_VECT2_EN_ONE_SHOT_VECT2_EN,
                                 0, 32), &vp_reg->one_shot_vect2_en);
-
-               __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
-                               VXGE_HW_ONE_SHOT_VECT3_EN_ONE_SHOT_VECT3_EN,
-                               0, 32), &vp_reg->one_shot_vect3_en);
         }
  }
  
@@ -2228,6 +2302,32 @@ vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vp, int msix_id)
                 &hldev->common_reg->set_msix_mask_vect[msix_id % 4]);
  }
  
+/**
+ * vxge_hw_vpath_msix_clear - Clear MSIX Vector.
+ * @vp: Virtual Path handle.
+ * @msix_id:  MSI ID
+ *
+ * The function clears the msix interrupt for the given msix_id
+ *
+ * Returns: 0,
+ * Otherwise, VXGE_HW_ERR_WRONG_IRQ if the msix index is out of range
+ * status.
+ * See also:
+ */
+void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id)
+{
+       struct __vxge_hw_device *hldev = vp->vpath->hldev;
+
+       if ((hldev->config.intr_mode == VXGE_HW_INTR_MODE_MSIX_ONE_SHOT))
+               __vxge_hw_pio_mem_write32_upper(
+                       (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
+                       &hldev->common_reg->clr_msix_one_shot_vec[msix_id % 4]);
+       else
+               __vxge_hw_pio_mem_write32_upper(
+                       (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
+                       &hldev->common_reg->clear_msix_mask_vect[msix_id % 4]);
+}
+
  /**
   * vxge_hw_vpath_msix_unmask - Unmask the MSIX Vector.
   * @vp: Virtual Path handle.
diff --git a/drivers/net/vxge/vxge-traffic.h b/drivers/net/vxge/vxge-traffic.h

index d48486d..9d9dfda 100644 (file)
--- a/drivers/net/vxge/vxge-traffic.h
+++ b/drivers/net/vxge/vxge-traffic.h
@@ -2142,6 +2142,10 @@ void vxge_hw_device_clear_tx_rx(
   *  Virtual Paths
   */
  
+void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring);
+
+void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo);
+
  u32 vxge_hw_vpath_id(
         struct __vxge_hw_vpath_handle *vpath_handle);
  
@@ -2245,6 +2249,8 @@ void
  vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vpath_handle,
                         int msix_id);
  
+void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id);
+
  void vxge_hw_device_flush_io(struct __vxge_hw_device *devh);
  
  void
@@ -2269,6 +2275,9 @@ vxge_hw_channel_msix_mask(struct __vxge_hw_channel *channelh, int msix_id);
  void
  vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channelh, int msix_id);
  
+void
+vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channelh, int msix_id);
+
  void
  vxge_hw_channel_dtr_try_complete(struct __vxge_hw_channel *channel,
                                  void **dtrh);
@@ -2282,7 +2291,8 @@ vxge_hw_channel_dtr_free(struct __vxge_hw_channel *channel, void *dtrh);
  int
  vxge_hw_channel_dtr_count(struct __vxge_hw_channel *channel);
  
-void
-vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id);
+void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo);
+
+void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring);
  
  #endif
diff --git a/drivers/net/vxge/vxge-version.h b/drivers/net/vxge/vxge-version.h

index ad2f99b..581e215 100644 (file)
--- a/drivers/net/vxge/vxge-version.h
+++ b/drivers/net/vxge/vxge-version.h
@@ -16,8 +16,8 @@
  
  #define VXGE_VERSION_MAJOR     "2"
  #define VXGE_VERSION_MINOR     "5"
-#define VXGE_VERSION_FIX       "1"
-#define VXGE_VERSION_BUILD     "22082"
+#define VXGE_VERSION_FIX       "2"
+#define VXGE_VERSION_BUILD     "22259"
  #define VXGE_VERSION_FOR       "k"
  
  #define VXGE_FW_VER(maj, min, bld) (((maj) << 16) + ((min) << 8) + (bld))
diff --git a/include/linux/audit.h b/include/linux/audit.h

index 359df04..9d339eb 100644 (file)
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -103,6 +103,8 @@
  #define AUDIT_BPRM_FCAPS       1321    /* Information about fcaps increasing perms */
  #define AUDIT_CAPSET           1322    /* Record showing argument to sys_capset */
  #define AUDIT_MMAP             1323    /* Record showing descriptor and flags in mmap */
+#define AUDIT_NETFILTER_PKT    1324    /* Packets traversing netfilter chains */
+#define AUDIT_NETFILTER_CFG    1325    /* Netfilter chain modifications */
  
  #define AUDIT_AVC              1400    /* SE Linux avc denial or grant */
  #define AUDIT_SELINUX_ERR      1401    /* Internal SE Linux Errors */
diff --git a/include/linux/dccp.h b/include/linux/dccp.h

index 010e2d8..d638e85 100644 (file)
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -279,8 +279,6 @@ enum dccp_state {
         DCCP_MAX_STATES
  };
  
-#define DCCP_STATE_MASK 0x1f
-
  enum {
         DCCPF_OPEN            = TCPF_ESTABLISHED,
         DCCPF_REQUESTING      = TCPF_SYN_SENT,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h

index 6485d2a..f4a2e6b 100644 (file)
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -135,6 +135,7 @@ enum {
         IFLA_VF_PORTS,
         IFLA_PORT_SELF,
         IFLA_AF_SPEC,
+       IFLA_GROUP,             /* Group the device belongs to */
         __IFLA_MAX
  };
  
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h

index 5f43a3b..4deb383 100644 (file)
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -89,6 +89,14 @@
  #define IP_VS_CONN_F_TEMPLATE  0x1000          /* template, not connection */
  #define IP_VS_CONN_F_ONE_PACKET        0x2000          /* forward only one packet */
  
+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
+                                 IP_VS_CONN_F_NOOUTPUT | \
+                                 IP_VS_CONN_F_INACTIVE | \
+                                 IP_VS_CONN_F_SEQ_MASK | \
+                                 IP_VS_CONN_F_NO_CPORT | \
+                                 IP_VS_CONN_F_TEMPLATE \
+                                )
+
  /* Flags that are not sent to backup server start from bit 16 */
  #define IP_VS_CONN_F_NFCT      (1 << 16)       /* use netfilter conntrack */
  
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

index d971346..371fa88 100644 (file)
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -75,6 +75,9 @@ struct wireless_dev;
  #define NET_RX_SUCCESS         0       /* keep 'em coming, baby */
  #define NET_RX_DROP            1       /* packet dropped */
  
+/* Initial net device group. All devices belong to group 0 by default. */
+#define INIT_NETDEV_GROUP      0
+
  /*
   * Transmit return codes: transmit return codes originate from three different
   * namespaces:
@@ -643,6 +646,14 @@ struct xps_dev_maps {
      (nr_cpu_ids * sizeof(struct xps_map *)))
  #endif /* CONFIG_XPS */
  
+#define TC_MAX_QUEUE   16
+#define TC_BITMASK     15
+/* HW offloaded queuing disciplines txq count and offset maps */
+struct netdev_tc_txq {
+       u16 count;
+       u16 offset;
+};
+
  /*
   * This structure defines the management hooks for network devices.
   * The following hooks can be defined; unless noted otherwise, they are
@@ -753,6 +764,11 @@ struct xps_dev_maps {
   * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
   *                       struct nlattr *port[]);
   * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
+ *     Called to setup 'tc' number of traffic classes in the net device. This
+ *     is always called from the stack with the rtnl lock held and netif tx
+ *     queues stopped. This allows the netdevice to perform queue management
+ *     safely.
   */
  #define HAVE_NET_DEVICE_OPS
  struct net_device_ops {
@@ -811,6 +827,7 @@ struct net_device_ops {
                                                    struct nlattr *port[]);
         int                     (*ndo_get_vf_port)(struct net_device *dev,
                                                    int vf, struct sk_buff *skb);
+       int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
         int                     (*ndo_fcoe_enable)(struct net_device *dev);
         int                     (*ndo_fcoe_disable)(struct net_device *dev);
@@ -1143,6 +1160,9 @@ struct net_device {
         /* Data Center Bridging netlink ops */
         const struct dcbnl_rtnl_ops *dcbnl_ops;
  #endif
+       u8 num_tc;
+       struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+       u8 prio_tc_map[TC_BITMASK + 1];
  
  #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
         /* max exchange id for FCoE LRO by ddp */
@@ -1153,11 +1173,65 @@ struct net_device {
  
         /* phy device may attach itself for hardware timestamping */
         struct phy_device *phydev;
+
+       /* group the device belongs to */
+       int group;
  };
  #define to_net_dev(d) container_of(d, struct net_device, dev)
  
  #define        NETDEV_ALIGN            32
  
+static inline
+int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
+{
+       return dev->prio_tc_map[prio & TC_BITMASK];
+}
+
+static inline
+int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+       dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
+       return 0;
+}
+
+static inline
+void netdev_reset_tc(struct net_device *dev)
+{
+       dev->num_tc = 0;
+       memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+       memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+
+static inline
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+       dev->tc_to_txq[tc].count = count;
+       dev->tc_to_txq[tc].offset = offset;
+       return 0;
+}
+
+static inline
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+       if (num_tc > TC_MAX_QUEUE)
+               return -EINVAL;
+
+       dev->num_tc = num_tc;
+       return 0;
+}
+
+static inline
+int netdev_get_num_tc(struct net_device *dev)
+{
+       return dev->num_tc;
+}
+
  static inline
  struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
                                          unsigned int index)
@@ -1844,6 +1918,7 @@ extern int                dev_set_alias(struct net_device *, const char *, size_t);
  extern int             dev_change_net_namespace(struct net_device *,
                                                  struct net *, const char *);
  extern int             dev_set_mtu(struct net_device *, int);
+extern void            dev_set_group(struct net_device *, int);
  extern int             dev_set_mac_address(struct net_device *,
                                             struct sockaddr *);
  extern int             dev_hard_start_xmit(struct sk_buff *skb,
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h

index 1893837..eeec00a 100644 (file)
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -24,16 +24,20 @@
  #define NF_MAX_VERDICT NF_STOP
  
  /* we overload the higher bits for encoding auxiliary data such as the queue
- * number. Not nice, but better than additional function arguments. */
-#define NF_VERDICT_MASK 0x0000ffff
-#define NF_VERDICT_BITS 16
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS   0x00008000
  
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
  #define NF_VERDICT_QMASK 0xffff0000
  #define NF_VERDICT_QBITS 16
  
-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
  
-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
  
  /* only for userspace compatibility */
  #ifndef __KERNEL__
@@ -41,6 +45,9 @@
     <= 0x2000 is used for protocol-flags. */
  #define NFC_UNKNOWN 0x4000
  #define NFC_ALTERED 0x8000
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
  #endif
  
  enum nf_inet_hooks {
@@ -72,6 +79,10 @@ union nf_inet_addr {
  
  #ifdef __KERNEL__
  #ifdef CONFIG_NETFILTER
+static inline int NF_DROP_GETERR(int verdict)
+{
+       return -(verdict >> NF_VERDICT_QBITS);
+}
  
  static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
                                    const union nf_inet_addr *a2)
@@ -267,7 +278,7 @@ struct nf_afinfo {
         int             route_key_size;
  };
  
-extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
+extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
  static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
  {
         return rcu_dereference(nf_afinfo[family]);
@@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
  #endif /*CONFIG_NETFILTER*/
  
  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
  extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
-extern void (*nf_ct_destroy)(struct nf_conntrack *);
+extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
  #else
  static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
  #endif
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild

index 9d40eff..89c0d1e 100644 (file)
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h
  header-y += nfnetlink_log.h
  header-y += nfnetlink_queue.h
  header-y += x_tables.h
+header-y += xt_AUDIT.h
  header-y += xt_CHECKSUM.h
  header-y += xt_CLASSIFY.h
  header-y += xt_CONNMARK.h
@@ -55,6 +56,7 @@ header-y += xt_rateest.h
  header-y += xt_realm.h
  header-y += xt_recent.h
  header-y += xt_sctp.h
+header-y += xt_socket.h
  header-y += xt_state.h
  header-y += xt_statistic.h
  header-y += xt_string.h
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h

new file mode 100644 (file)

index 0000000..064bc63
--- /dev/null
+++ b/include/linux/netfilter/nf_conntrack_snmp.h
@@ -0,0 +1,9 @@
+#ifndef _NF_CONNTRACK_SNMP_H
+#define _NF_CONNTRACK_SNMP_H
+
+extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+                               unsigned int protoff,
+                               struct nf_conn *ct,
+                               enum ip_conntrack_info ctinfo);
+
+#endif /* _NF_CONNTRACK_SNMP_H */
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h

index 19711e3..debf1ae 100644 (file)
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -42,6 +42,7 @@ enum ctattr_type {
         CTA_SECMARK,            /* obsolete */
         CTA_ZONE,
         CTA_SECCTX,
+       CTA_TIMESTAMP,
         __CTA_MAX
  };
  #define CTA_MAX (__CTA_MAX - 1)
@@ -127,6 +128,14 @@ enum ctattr_counters {
  };
  #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
  
+enum ctattr_tstamp {
+       CTA_TIMESTAMP_UNSPEC,
+       CTA_TIMESTAMP_START,
+       CTA_TIMESTAMP_STOP,
+       __CTA_TIMESTAMP_MAX
+};
+#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
+
  enum ctattr_nat {
         CTA_NAT_UNSPEC,
         CTA_NAT_MINIP,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h

index 6712e71..3721952 100644 (file)
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -611,8 +611,9 @@ struct _compat_xt_align {
  extern void xt_compat_lock(u_int8_t af);
  extern void xt_compat_unlock(u_int8_t af);
  
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
+extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
  extern void xt_compat_flush_offsets(u_int8_t af);
+extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
  extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
  
  extern int xt_compat_match_offset(const struct xt_match *match);
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h

new file mode 100644 (file)

index 0000000..38751d2
--- /dev/null
+++ b/include/linux/netfilter/xt_AUDIT.h
@@ -0,0 +1,30 @@
+/*
+ * Header file for iptables xt_AUDIT target
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _XT_AUDIT_TARGET_H
+#define _XT_AUDIT_TARGET_H
+
+#include <linux/types.h>
+
+enum {
+       XT_AUDIT_TYPE_ACCEPT = 0,
+       XT_AUDIT_TYPE_DROP,
+       XT_AUDIT_TYPE_REJECT,
+       __XT_AUDIT_TYPE_MAX,
+};
+
+#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)
+
+struct xt_audit_info {
+       __u8 type; /* XT_AUDIT_TYPE_* */
+};
+
+#endif /* _XT_AUDIT_TARGET_H */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h

index 1b56410..b56e768 100644 (file)
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -1,14 +1,16 @@
  #ifndef _XT_CT_H
  #define _XT_CT_H
  
+#include <linux/types.h>
+
  #define XT_CT_NOTRACK  0x1
  
  struct xt_ct_target_info {
-       u_int16_t       flags;
-       u_int16_t       zone;
-       u_int32_t       ct_events;
-       u_int32_t       exp_events;
-       char            helper[16];
+       __u16 flags;
+       __u16 zone;
+       __u32 ct_events;
+       __u32 exp_events;
+       char helper[16];
  
         /* Used internally by the kernel */
         struct nf_conn  *ct __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/linux/netfilter/xt_NFQUEUE.h

index 2584f4a..9eafdbb 100644 (file)
--- a/include/linux/netfilter/xt_NFQUEUE.h
+++ b/include/linux/netfilter/xt_NFQUEUE.h
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 {
         __u16 queues_total;
  };
  
+struct xt_NFQ_info_v2 {
+       __u16 queuenum;
+       __u16 queues_total;
+       __u16 bypass;
+};
+
  #endif /* _XT_NFQ_TARGET_H */
diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/linux/netfilter/xt_TCPOPTSTRIP.h

index 2db5432..7157318 100644 (file)
--- a/include/linux/netfilter/xt_TCPOPTSTRIP.h
+++ b/include/linux/netfilter/xt_TCPOPTSTRIP.h
@@ -1,13 +1,15 @@
  #ifndef _XT_TCPOPTSTRIP_H
  #define _XT_TCPOPTSTRIP_H
  
+#include <linux/types.h>
+
  #define tcpoptstrip_set_bit(bmap, idx) \
         (bmap[(idx) >> 5] |= 1U << (idx & 31))
  #define tcpoptstrip_test_bit(bmap, idx) \
         (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
  
  struct xt_tcpoptstrip_target_info {
-       u_int32_t strip_bmap[8];
+       __u32 strip_bmap[8];
  };
  
  #endif /* _XT_TCPOPTSTRIP_H */
diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h

index 3f3d693..902043c 100644 (file)
--- a/include/linux/netfilter/xt_TPROXY.h
+++ b/include/linux/netfilter/xt_TPROXY.h
@@ -1,19 +1,21 @@
  #ifndef _XT_TPROXY_H
  #define _XT_TPROXY_H
  
+#include <linux/types.h>
+
  /* TPROXY target is capable of marking the packet to perform
   * redirection. We can get rid of that whenever we get support for
   * mutliple targets in the same rule. */
  struct xt_tproxy_target_info {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
         __be32 laddr;
         __be16 lport;
  };
  
  struct xt_tproxy_target_info_v1 {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
         union nf_inet_addr laddr;
         __be16 lport;
  };
diff --git a/include/linux/netfilter/xt_cluster.h b/include/linux/netfilter/xt_cluster.h

index 8866826..9b883c8 100644 (file)
--- a/include/linux/netfilter/xt_cluster.h
+++ b/include/linux/netfilter/xt_cluster.h
@@ -1,15 +1,17 @@
  #ifndef _XT_CLUSTER_MATCH_H
  #define _XT_CLUSTER_MATCH_H
  
+#include <linux/types.h>
+
  enum xt_cluster_flags {
         XT_CLUSTER_F_INV        = (1 << 0)
  };
  
  struct xt_cluster_match_info {
-       u_int32_t               total_nodes;
-       u_int32_t               node_mask;
-       u_int32_t               hash_seed;
-       u_int32_t               flags;
+       __u32 total_nodes;
+       __u32 node_mask;
+       __u32 hash_seed;
+       __u32 flags;
  };
  
  #define XT_CLUSTER_NODES_MAX   32
diff --git a/include/linux/netfilter/xt_comment.h b/include/linux/netfilter/xt_comment.h

index eacfedc..0ea5e79 100644 (file)
--- a/include/linux/netfilter/xt_comment.h
+++ b/include/linux/netfilter/xt_comment.h
@@ -4,7 +4,7 @@
  #define XT_MAX_COMMENT_LEN 256
  
  struct xt_comment_info {
-       unsigned char comment[XT_MAX_COMMENT_LEN];
+       char comment[XT_MAX_COMMENT_LEN];
  };
  
  #endif /* XT_COMMENT_H */
diff --git a/include/linux/netfilter/xt_connlimit.h b/include/linux/netfilter/xt_connlimit.h

index 7e3284b..0ca66e9 100644 (file)
--- a/include/linux/netfilter/xt_connlimit.h
+++ b/include/linux/netfilter/xt_connlimit.h
@@ -1,8 +1,15 @@
  #ifndef _XT_CONNLIMIT_H
  #define _XT_CONNLIMIT_H
  
+#include <linux/types.h>
+
  struct xt_connlimit_data;
  
+enum {
+       XT_CONNLIMIT_INVERT = 1 << 0,
+       XT_CONNLIMIT_DADDR  = 1 << 1,
+};
+
  struct xt_connlimit_info {
         union {
                 union nf_inet_addr mask;
@@ -13,7 +20,14 @@ struct xt_connlimit_info {
                 };
  #endif
         };
-       unsigned int limit, inverse;
+       unsigned int limit;
+       union {
+               /* revision 0 */
+               unsigned int inverse;
+
+               /* revision 1 */
+               __u32 flags;
+       };
  
         /* Used internally by the kernel */
         struct xt_connlimit_data *data __attribute__((aligned(8)));
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h

index 54f47a2..74b904d 100644 (file)
--- a/include/linux/netfilter/xt_conntrack.h
+++ b/include/linux/netfilter/xt_conntrack.h
@@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 {
         __u16 state_mask, status_mask;
  };
  
+struct xt_conntrack_mtinfo3 {
+       union nf_inet_addr origsrc_addr, origsrc_mask;
+       union nf_inet_addr origdst_addr, origdst_mask;
+       union nf_inet_addr replsrc_addr, replsrc_mask;
+       union nf_inet_addr repldst_addr, repldst_mask;
+       __u32 expires_min, expires_max;
+       __u16 l4proto;
+       __u16 origsrc_port, origdst_port;
+       __u16 replsrc_port, repldst_port;
+       __u16 match_flags, invert_flags;
+       __u16 state_mask, status_mask;
+       __u16 origsrc_port_high, origdst_port_high;
+       __u16 replsrc_port_high, repldst_port_high;
+};
+
  #endif /*_XT_CONNTRACK_H*/
diff --git a/include/linux/netfilter/xt_quota.h b/include/linux/netfilter/xt_quota.h

index b0d28c6..ca6e03e 100644 (file)
--- a/include/linux/netfilter/xt_quota.h
+++ b/include/linux/netfilter/xt_quota.h
@@ -1,6 +1,8 @@
  #ifndef _XT_QUOTA_H
  #define _XT_QUOTA_H
  
+#include <linux/types.h>
+
  enum xt_quota_flags {
         XT_QUOTA_INVERT         = 0x1,
  };
@@ -9,9 +11,9 @@ enum xt_quota_flags {
  struct xt_quota_priv;
  
  struct xt_quota_info {
-       u_int32_t               flags;
-       u_int32_t               pad;
-       aligned_u64             quota;
+       __u32 flags;
+       __u32 pad;
+       aligned_u64 quota;
  
         /* Used internally by the kernel */
         struct xt_quota_priv    *master;
diff --git a/include/linux/netfilter/xt_socket.h b/include/linux/netfilter/xt_socket.h

index 6f475b8..26d7217 100644 (file)
--- a/include/linux/netfilter/xt_socket.h
+++ b/include/linux/netfilter/xt_socket.h
@@ -1,6 +1,8 @@
  #ifndef _XT_SOCKET_H
  #define _XT_SOCKET_H
  
+#include <linux/types.h>
+
  enum {
         XT_SOCKET_TRANSPARENT = 1 << 0,
  };
diff --git a/include/linux/netfilter/xt_time.h b/include/linux/netfilter/xt_time.h

index 14b6df4..7c37fac 100644 (file)
--- a/include/linux/netfilter/xt_time.h
+++ b/include/linux/netfilter/xt_time.h
@@ -1,14 +1,16 @@
  #ifndef _XT_TIME_H
  #define _XT_TIME_H 1
  
+#include <linux/types.h>
+
  struct xt_time_info {
-       u_int32_t date_start;
-       u_int32_t date_stop;
-       u_int32_t daytime_start;
-       u_int32_t daytime_stop;
-       u_int32_t monthdays_match;
-       u_int8_t weekdays_match;
-       u_int8_t flags;
+       __u32 date_start;
+       __u32 date_stop;
+       __u32 daytime_start;
+       __u32 daytime_stop;
+       __u32 monthdays_match;
+       __u8 weekdays_match;
+       __u8 flags;
  };
  
  enum {
diff --git a/include/linux/netfilter/xt_u32.h b/include/linux/netfilter/xt_u32.h

index 9947f56..04d1bfe 100644 (file)
--- a/include/linux/netfilter/xt_u32.h
+++ b/include/linux/netfilter/xt_u32.h
@@ -1,6 +1,8 @@
  #ifndef _XT_U32_H
  #define _XT_U32_H 1
  
+#include <linux/types.h>
+
  enum xt_u32_ops {
         XT_U32_AND,
         XT_U32_LEFTSH,
@@ -9,13 +11,13 @@ enum xt_u32_ops {
  };
  
  struct xt_u32_location_element {
-       u_int32_t number;
-       u_int8_t nextop;
+       __u32 number;
+       __u8 nextop;
  };
  
  struct xt_u32_value_element {
-       u_int32_t min;
-       u_int32_t max;
+       __u32 min;
+       __u32 max;
  };
  
  /*
@@ -27,14 +29,14 @@ struct xt_u32_value_element {
  struct xt_u32_test {
         struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
         struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
-       u_int8_t nnums;
-       u_int8_t nvalues;
+       __u8 nnums;
+       __u8 nvalues;
  };
  
  struct xt_u32 {
         struct xt_u32_test tests[XT_U32_MAXSIZE+1];
-       u_int8_t ntests;
-       u_int8_t invert;
+       __u8 ntests;
+       __u8 invert;
  };
  
  #endif /* _XT_U32_H */
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h

index c73ef0b..be5be15 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_802_3_H
  #define __LINUX_BRIDGE_EBT_802_3_H
  
+#include <linux/types.h>
+
  #define EBT_802_3_SAP 0x01
  #define EBT_802_3_TYPE 0x02
  
@@ -24,24 +26,24 @@
  
  /* ui has one byte ctrl, ni has two */
  struct hdr_ui {
-       uint8_t dsap;
-       uint8_t ssap;
-       uint8_t ctrl;
-       uint8_t orig[3];
+       __u8 dsap;
+       __u8 ssap;
+       __u8 ctrl;
+       __u8 orig[3];
         __be16 type;
  };
  
  struct hdr_ni {
-       uint8_t dsap;
-       uint8_t ssap;
+       __u8 dsap;
+       __u8 ssap;
         __be16 ctrl;
-       uint8_t  orig[3];
+       __u8  orig[3];
         __be16 type;
  };
  
  struct ebt_802_3_hdr {
-       uint8_t  daddr[6];
-       uint8_t  saddr[6];
+       __u8  daddr[6];
+       __u8  saddr[6];
         __be16 len;
         union {
                 struct hdr_ui ui;
@@ -59,10 +61,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
  #endif
  
  struct ebt_802_3_info {
-       uint8_t  sap;
+       __u8  sap;
         __be16 type;
-       uint8_t  bitmask;
-       uint8_t  invflags;
+       __u8  bitmask;
+       __u8  invflags;
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h

index 0009558..bd4e3ad 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_among.h
+++ b/include/linux/netfilter_bridge/ebt_among.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_AMONG_H
  #define __LINUX_BRIDGE_EBT_AMONG_H
  
+#include <linux/types.h>
+
  #define EBT_AMONG_DST 0x01
  #define EBT_AMONG_SRC 0x02
  
@@ -30,7 +32,7 @@
   */
  
  struct ebt_mac_wormhash_tuple {
-       uint32_t cmp[2];
+       __u32 cmp[2];
         __be32 ip;
  };
  
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h

index cbf4843..522f3e4 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_ARP_H
  #define __LINUX_BRIDGE_EBT_ARP_H
  
+#include <linux/types.h>
+
  #define EBT_ARP_OPCODE 0x01
  #define EBT_ARP_HTYPE 0x02
  #define EBT_ARP_PTYPE 0x04
@@ -27,8 +29,8 @@ struct ebt_arp_info
         unsigned char smmsk[ETH_ALEN];
         unsigned char dmaddr[ETH_ALEN];
         unsigned char dmmsk[ETH_ALEN];
-       uint8_t  bitmask;
-       uint8_t  invflags;
+       __u8  bitmask;
+       __u8  invflags;
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h

index 6a708fb..c4bbc41 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_ip.h
+++ b/include/linux/netfilter_bridge/ebt_ip.h
@@ -15,6 +15,8 @@
  #ifndef __LINUX_BRIDGE_EBT_IP_H
  #define __LINUX_BRIDGE_EBT_IP_H
  
+#include <linux/types.h>
+
  #define EBT_IP_SOURCE 0x01
  #define EBT_IP_DEST 0x02
  #define EBT_IP_TOS 0x04
@@ -31,12 +33,12 @@ struct ebt_ip_info {
         __be32 daddr;
         __be32 smsk;
         __be32 dmsk;
-       uint8_t  tos;
-       uint8_t  protocol;
-       uint8_t  bitmask;
-       uint8_t  invflags;
-       uint16_t sport[2];
-       uint16_t dport[2];
+       __u8  tos;
+       __u8  protocol;
+       __u8  bitmask;
+       __u8  invflags;
+       __u16 sport[2];
+       __u16 dport[2];
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h

index e5de987..42b8896 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -12,14 +12,19 @@
  #ifndef __LINUX_BRIDGE_EBT_IP6_H
  #define __LINUX_BRIDGE_EBT_IP6_H
  
+#include <linux/types.h>
+
  #define EBT_IP6_SOURCE 0x01
  #define EBT_IP6_DEST 0x02
  #define EBT_IP6_TCLASS 0x04
  #define EBT_IP6_PROTO 0x08
  #define EBT_IP6_SPORT 0x10
  #define EBT_IP6_DPORT 0x20
+#define EBT_IP6_ICMP6 0x40
+
  #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
-                     EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT)
+                     EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \
+                     EBT_IP6_ICMP6)
  #define EBT_IP6_MATCH "ip6"
  
  /* the same values are used for the invflags */
@@ -28,12 +33,18 @@ struct ebt_ip6_info {
         struct in6_addr daddr;
         struct in6_addr smsk;
         struct in6_addr dmsk;
-       uint8_t  tclass;
-       uint8_t  protocol;
-       uint8_t  bitmask;
-       uint8_t  invflags;
-       uint16_t sport[2];
-       uint16_t dport[2];
+       __u8  tclass;
+       __u8  protocol;
+       __u8  bitmask;
+       __u8  invflags;
+       union {
+               __u16 sport[2];
+               __u8 icmpv6_type[2];
+       };
+       union {
+               __u16 dport[2];
+               __u8 icmpv6_code[2];
+       };
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h

index 4bf76b7..66d80b3 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_limit.h
+++ b/include/linux/netfilter_bridge/ebt_limit.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_LIMIT_H
  #define __LINUX_BRIDGE_EBT_LIMIT_H
  
+#include <linux/types.h>
+
  #define EBT_LIMIT_MATCH "limit"
  
  /* timings are in milliseconds. */
@@ -10,13 +12,13 @@
     seconds, or one every 59 hours. */
  
  struct ebt_limit_info {
-       u_int32_t avg;    /* Average secs between packets * scale */
-       u_int32_t burst;  /* Period multiplier for upper limit. */
+       __u32 avg;    /* Average secs between packets * scale */
+       __u32 burst;  /* Period multiplier for upper limit. */
  
         /* Used internally by the kernel */
         unsigned long prev;
-       u_int32_t credit;
-       u_int32_t credit_cap, cost;
+       __u32 credit;
+       __u32 credit_cap, cost;
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h

index cc2cdfb..7e7f1d1 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_LOG_H
  #define __LINUX_BRIDGE_EBT_LOG_H
  
+#include <linux/types.h>
+
  #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */
  #define EBT_LOG_ARP 0x02
  #define EBT_LOG_NFLOG 0x04
@@ -10,9 +12,9 @@
  #define EBT_LOG_WATCHER "log"
  
  struct ebt_log_info {
-       uint8_t loglevel;
-       uint8_t prefix[EBT_LOG_PREFIX_SIZE];
-       uint32_t bitmask;
+       __u8 loglevel;
+       __u8 prefix[EBT_LOG_PREFIX_SIZE];
+       __u32 bitmask;
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h

index 9ceb10e..410f9e5 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_mark_m.h
+++ b/include/linux/netfilter_bridge/ebt_mark_m.h
@@ -1,13 +1,15 @@
  #ifndef __LINUX_BRIDGE_EBT_MARK_M_H
  #define __LINUX_BRIDGE_EBT_MARK_M_H
  
+#include <linux/types.h>
+
  #define EBT_MARK_AND 0x01
  #define EBT_MARK_OR 0x02
  #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
  struct ebt_mark_m_info {
         unsigned long mark, mask;
-       uint8_t invert;
-       uint8_t bitmask;
+       __u8 invert;
+       __u8 bitmask;
  };
  #define EBT_MARK_MATCH "mark_m"
  
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h

index 0528178..df829fc 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_nflog.h
+++ b/include/linux/netfilter_bridge/ebt_nflog.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_NFLOG_H
  #define __LINUX_BRIDGE_EBT_NFLOG_H
  
+#include <linux/types.h>
+
  #define EBT_NFLOG_MASK 0x0
  
  #define EBT_NFLOG_PREFIX_SIZE 64
@@ -10,11 +12,11 @@
  #define EBT_NFLOG_DEFAULT_THRESHOLD    1
  
  struct ebt_nflog_info {
-       u_int32_t len;
-       u_int16_t group;
-       u_int16_t threshold;
-       u_int16_t flags;
-       u_int16_t pad;
+       __u32 len;
+       __u16 group;
+       __u16 threshold;
+       __u16 flags;
+       __u16 pad;
         char prefix[EBT_NFLOG_PREFIX_SIZE];
  };
  
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h

index 51a7998..c241bad 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_pkttype.h
+++ b/include/linux/netfilter_bridge/ebt_pkttype.h
@@ -1,9 +1,11 @@
  #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H
  #define __LINUX_BRIDGE_EBT_PKTTYPE_H
  
+#include <linux/types.h>
+
  struct ebt_pkttype_info {
-       uint8_t pkt_type;
-       uint8_t invert;
+       __u8 pkt_type;
+       __u8 invert;
  };
  #define EBT_PKTTYPE_MATCH "pkttype"
  
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h

index e503a0a..1025b9f 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_stp.h
+++ b/include/linux/netfilter_bridge/ebt_stp.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_STP_H
  #define __LINUX_BRIDGE_EBT_STP_H
  
+#include <linux/types.h>
+
  #define EBT_STP_TYPE           0x0001
  
  #define EBT_STP_FLAGS          0x0002
@@ -21,24 +23,24 @@
  #define EBT_STP_MATCH "stp"
  
  struct ebt_stp_config_info {
-       uint8_t flags;
-       uint16_t root_priol, root_priou;
+       __u8 flags;
+       __u16 root_priol, root_priou;
         char root_addr[6], root_addrmsk[6];
-       uint32_t root_costl, root_costu;
-       uint16_t sender_priol, sender_priou;
+       __u32 root_costl, root_costu;
+       __u16 sender_priol, sender_priou;
         char sender_addr[6], sender_addrmsk[6];
-       uint16_t portl, portu;
-       uint16_t msg_agel, msg_ageu;
-       uint16_t max_agel, max_ageu;
-       uint16_t hello_timel, hello_timeu;
-       uint16_t forward_delayl, forward_delayu;
+       __u16 portl, portu;
+       __u16 msg_agel, msg_ageu;
+       __u16 max_agel, max_ageu;
+       __u16 hello_timel, hello_timeu;
+       __u16 forward_delayl, forward_delayu;
  };
  
  struct ebt_stp_info {
-       uint8_t type;
+       __u8 type;
         struct ebt_stp_config_info config;
-       uint16_t bitmask;
-       uint16_t invflags;
+       __u16 bitmask;
+       __u16 invflags;
  };
  
  #endif
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h

index b677e26..89a6bec 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_ulog.h
+++ b/include/linux/netfilter_bridge/ebt_ulog.h
@@ -1,6 +1,8 @@
  #ifndef _EBT_ULOG_H
  #define _EBT_ULOG_H
  
+#include <linux/types.h>
+
  #define EBT_ULOG_DEFAULT_NLGROUP 0
  #define EBT_ULOG_DEFAULT_QTHRESHOLD 1
  #define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */
@@ -10,7 +12,7 @@
  #define EBT_ULOG_VERSION 1
  
  struct ebt_ulog_info {
-       uint32_t nlgroup;
+       __u32 nlgroup;
         unsigned int cprange;
         unsigned int qthreshold;
         char prefix[EBT_ULOG_PREFIX_LEN];
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h

index 1d98be4..967d1d5 100644 (file)
--- a/include/linux/netfilter_bridge/ebt_vlan.h
+++ b/include/linux/netfilter_bridge/ebt_vlan.h
@@ -1,6 +1,8 @@
  #ifndef __LINUX_BRIDGE_EBT_VLAN_H
  #define __LINUX_BRIDGE_EBT_VLAN_H
  
+#include <linux/types.h>
+
  #define EBT_VLAN_ID    0x01
  #define EBT_VLAN_PRIO  0x02
  #define EBT_VLAN_ENCAP 0x04
@@ -8,12 +10,12 @@
  #define EBT_VLAN_MATCH "vlan"
  
  struct ebt_vlan_info {
-       uint16_t id;            /* VLAN ID {1-4095} */
-       uint8_t prio;           /* VLAN User Priority {0-7} */
+       __u16 id;               /* VLAN ID {1-4095} */
+       __u8 prio;              /* VLAN User Priority {0-7} */
         __be16 encap;           /* VLAN Encapsulated frame code {0-65535} */
-       uint8_t bitmask;                /* Args bitmask bit 1=1 - ID arg,
+       __u8 bitmask;           /* Args bitmask bit 1=1 - ID arg,
                                    bit 2=1 User-Priority arg, bit 3=1 encap*/
-       uint8_t invflags;               /* Inverse bitmask  bit 1=1 - inversed ID arg, 
+       __u8 invflags;          /* Inverse bitmask  bit 1=1 - inversed ID arg, 
                                    bit 2=1 - inversed Pirority arg */
  };
  
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h

index e5a3687..c6a204c 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -1,6 +1,8 @@
  #ifndef _IPT_CLUSTERIP_H_target
  #define _IPT_CLUSTERIP_H_target
  
+#include <linux/types.h>
+
  enum clusterip_hashmode {
      CLUSTERIP_HASHMODE_SIP = 0,
      CLUSTERIP_HASHMODE_SIP_SPT,
@@ -17,15 +19,15 @@ struct clusterip_config;
  
  struct ipt_clusterip_tgt_info {
  
-       u_int32_t flags;
+       __u32 flags;
  
         /* only relevant for new ones */
-       u_int8_t clustermac[6];
-       u_int16_t num_total_nodes;
-       u_int16_t num_local_nodes;
-       u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
-       u_int32_t hash_mode;
-       u_int32_t hash_initval;
+       __u8 clustermac[6];
+       __u16 num_total_nodes;
+       __u16 num_local_nodes;
+       __u16 local_nodes[CLUSTERIP_MAX_NODES];
+       __u32 hash_mode;
+       __u32 hash_initval;
  
         /* Used internally by the kernel */
         struct clusterip_config *config;
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h

index 7ca4591..bb88d53 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_ECN.h
+++ b/include/linux/netfilter_ipv4/ipt_ECN.h
@@ -8,6 +8,8 @@
  */
  #ifndef _IPT_ECN_TARGET_H
  #define _IPT_ECN_TARGET_H
+
+#include <linux/types.h>
  #include <linux/netfilter/xt_DSCP.h>
  
  #define IPT_ECN_IP_MASK        (~XT_DSCP_MASK)
@@ -19,11 +21,11 @@
  #define IPT_ECN_OP_MASK                0xce
  
  struct ipt_ECN_info {
-       u_int8_t operation;     /* bitset of operations */
-       u_int8_t ip_ect;        /* ECT codepoint of IPv4 header, pre-shifted */
+       __u8 operation; /* bitset of operations */
+       __u8 ip_ect;    /* ECT codepoint of IPv4 header, pre-shifted */
         union {
                 struct {
-                       u_int8_t ece:1, cwr:1; /* TCP ECT bits */
+                       __u8 ece:1, cwr:1; /* TCP ECT bits */
                 } tcp;
         } proto;
  };
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h

index 2529660..5bca782 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -1,15 +1,17 @@
  #ifndef _IPT_SAME_H
  #define _IPT_SAME_H
  
+#include <linux/types.h>
+
  #define IPT_SAME_MAX_RANGE     10
  
  #define IPT_SAME_NODST         0x01
  
  struct ipt_same_info {
         unsigned char info;
-       u_int32_t rangesize;
-       u_int32_t ipnum;
-       u_int32_t *iparray;
+       __u32 rangesize;
+       __u32 ipnum;
+       __u32 *iparray;
  
         /* hangs off end. */
         struct nf_nat_range range[IPT_SAME_MAX_RANGE];
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h

index ee6611e..f6ac169 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_TTL.h
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -4,6 +4,8 @@
  #ifndef _IPT_TTL_H
  #define _IPT_TTL_H
  
+#include <linux/types.h>
+
  enum {
         IPT_TTL_SET = 0,
         IPT_TTL_INC,
@@ -13,8 +15,8 @@ enum {
  #define IPT_TTL_MAXMODE        IPT_TTL_DEC
  
  struct ipt_TTL_info {
-       u_int8_t        mode;
-       u_int8_t        ttl;
+       __u8    mode;
+       __u8    ttl;
  };
  
  
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h

index 446de6a..0da4223 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_addrtype.h
+++ b/include/linux/netfilter_ipv4/ipt_addrtype.h
@@ -1,6 +1,8 @@
  #ifndef _IPT_ADDRTYPE_H
  #define _IPT_ADDRTYPE_H
  
+#include <linux/types.h>
+
  enum {
         IPT_ADDRTYPE_INVERT_SOURCE      = 0x0001,
         IPT_ADDRTYPE_INVERT_DEST        = 0x0002,
@@ -9,17 +11,17 @@ enum {
  };
  
  struct ipt_addrtype_info_v1 {
-       u_int16_t       source;         /* source-type mask */
-       u_int16_t       dest;           /* dest-type mask */
-       u_int32_t       flags;
+       __u16   source;         /* source-type mask */
+       __u16   dest;           /* dest-type mask */
+       __u32   flags;
  };
  
  /* revision 0 */
  struct ipt_addrtype_info {
-       u_int16_t       source;         /* source-type mask */
-       u_int16_t       dest;           /* dest-type mask */
-       u_int32_t       invert_source;
-       u_int32_t       invert_dest;
+       __u16   source;         /* source-type mask */
+       __u16   dest;           /* dest-type mask */
+       __u32   invert_source;
+       __u32   invert_dest;
  };
  
  #endif
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h

index 2e555b4..4e02bb0 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_ah.h
+++ b/include/linux/netfilter_ipv4/ipt_ah.h
@@ -1,9 +1,11 @@
  #ifndef _IPT_AH_H
  #define _IPT_AH_H
  
+#include <linux/types.h>
+
  struct ipt_ah {
-       u_int32_t spis[2];                      /* Security Parameter Index */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 spis[2];                  /* Security Parameter Index */
+       __u8  invflags;                 /* Inverse flags */
  };
  
  
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h

index 9945baa..eabf95f 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -8,6 +8,8 @@
  */
  #ifndef _IPT_ECN_H
  #define _IPT_ECN_H
+
+#include <linux/types.h>
  #include <linux/netfilter/xt_dscp.h>
  
  #define IPT_ECN_IP_MASK        (~XT_DSCP_MASK)
@@ -20,12 +22,12 @@
  
  /* match info */
  struct ipt_ecn_info {
-       u_int8_t operation;
-       u_int8_t invert;
-       u_int8_t ip_ect;
+       __u8 operation;
+       __u8 invert;
+       __u8 ip_ect;
         union {
                 struct {
-                       u_int8_t ect;
+                       __u8 ect;
                 } tcp;
         } proto;
  };
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h

index ee24fd8..37bee44 100644 (file)
--- a/include/linux/netfilter_ipv4/ipt_ttl.h
+++ b/include/linux/netfilter_ipv4/ipt_ttl.h
@@ -4,6 +4,8 @@
  #ifndef _IPT_TTL_H
  #define _IPT_TTL_H
  
+#include <linux/types.h>
+
  enum {
         IPT_TTL_EQ = 0,         /* equals */
         IPT_TTL_NE,             /* not equals */
@@ -13,8 +15,8 @@ enum {
  
  
  struct ipt_ttl_info {
-       u_int8_t        mode;
-       u_int8_t        ttl;
+       __u8    mode;
+       __u8    ttl;
  };
  
  
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h

index afb7813..ebd8ead 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_HL.h
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -5,6 +5,8 @@
  #ifndef _IP6T_HL_H
  #define _IP6T_HL_H
  
+#include <linux/types.h>
+
  enum {
         IP6T_HL_SET = 0,
         IP6T_HL_INC,
@@ -14,8 +16,8 @@ enum {
  #define IP6T_HL_MAXMODE        IP6T_HL_DEC
  
  struct ip6t_HL_info {
-       u_int8_t        mode;
-       u_int8_t        hop_limit;
+       __u8    mode;
+       __u8    hop_limit;
  };
  
  
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h

index 6be6504..205ed62 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_REJECT.h
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -1,6 +1,8 @@
  #ifndef _IP6T_REJECT_H
  #define _IP6T_REJECT_H
  
+#include <linux/types.h>
+
  enum ip6t_reject_with {
         IP6T_ICMP6_NO_ROUTE,
         IP6T_ICMP6_ADM_PROHIBITED,
@@ -12,7 +14,7 @@ enum ip6t_reject_with {
  };
  
  struct ip6t_reject_info {
-       u_int32_t       with;   /* reject type */
+       __u32   with;   /* reject type */
  };
  
  #endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h

index 17a745c..5da2b65 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_ah.h
+++ b/include/linux/netfilter_ipv6/ip6t_ah.h
@@ -1,11 +1,13 @@
  #ifndef _IP6T_AH_H
  #define _IP6T_AH_H
  
+#include <linux/types.h>
+
  struct ip6t_ah {
-       u_int32_t spis[2];                      /* Security Parameter Index */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  hdrres;                       /* Test of the Reserved Filed */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 spis[2];                  /* Security Parameter Index */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  hdrres;                   /* Test of the Reserved Filed */
+       __u8  invflags;                 /* Inverse flags */
  };
  
  #define IP6T_AH_SPI 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h

index 3724d08..b47f61b 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/linux/netfilter_ipv6/ip6t_frag.h
@@ -1,11 +1,13 @@
  #ifndef _IP6T_FRAG_H
  #define _IP6T_FRAG_H
  
+#include <linux/types.h>
+
  struct ip6t_frag {
-       u_int32_t ids[2];                       /* Security Parameter Index */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  flags;                        /*  */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 ids[2];                   /* Security Parameter Index */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  flags;                    /*  */
+       __u8  invflags;                 /* Inverse flags */
  };
  
  #define IP6T_FRAG_IDS          0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h

index 5ef91b8..6e76dbc 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_hl.h
+++ b/include/linux/netfilter_ipv6/ip6t_hl.h
@@ -5,6 +5,8 @@
  #ifndef _IP6T_HL_H
  #define _IP6T_HL_H
  
+#include <linux/types.h>
+
  enum {
         IP6T_HL_EQ = 0,         /* equals */
         IP6T_HL_NE,             /* not equals */
@@ -14,8 +16,8 @@ enum {
  
  
  struct ip6t_hl_info {
-       u_int8_t        mode;
-       u_int8_t        hop_limit;
+       __u8    mode;
+       __u8    hop_limit;
  };
  
  
diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h

index 01dfd44..efae3a2 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h
+++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
@@ -8,10 +8,12 @@ on whether they contain certain headers */
  #ifndef __IPV6HEADER_H
  #define __IPV6HEADER_H
  
+#include <linux/types.h>
+
  struct ip6t_ipv6header_info {
-       u_int8_t matchflags;
-       u_int8_t invflags;
-       u_int8_t modeflag;
+       __u8 matchflags;
+       __u8 invflags;
+       __u8 modeflag;
  };
  
  #define MASK_HOPOPTS    128
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h

index 18549bc..a7729a5 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_mh.h
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -1,10 +1,12 @@
  #ifndef _IP6T_MH_H
  #define _IP6T_MH_H
  
+#include <linux/types.h>
+
  /* MH matching stuff */
  struct ip6t_mh {
-       u_int8_t types[2];      /* MH type range */
-       u_int8_t invflags;      /* Inverse flags */
+       __u8 types[2];  /* MH type range */
+       __u8 invflags;  /* Inverse flags */
  };
  
  /* Values for "invflags" field in struct ip6t_mh. */
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h

index 62d89bc..17d419a 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_opts.h
+++ b/include/linux/netfilter_ipv6/ip6t_opts.h
@@ -1,14 +1,16 @@
  #ifndef _IP6T_OPTS_H
  #define _IP6T_OPTS_H
  
+#include <linux/types.h>
+
  #define IP6T_OPTS_OPTSNR 16
  
  struct ip6t_opts {
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t flags;                         /*  */
-       u_int8_t invflags;                      /* Inverse flags */
-       u_int16_t opts[IP6T_OPTS_OPTSNR];       /* opts */
-       u_int8_t optsnr;                        /* Nr of OPts */
+       __u32 hdrlen;                   /* Header Length */
+       __u8 flags;                             /*  */
+       __u8 invflags;                  /* Inverse flags */
+       __u16 opts[IP6T_OPTS_OPTSNR];   /* opts */
+       __u8 optsnr;                    /* Nr of OPts */
  };
  
  #define IP6T_OPTS_LEN          0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h

index ab91bfd..7605a5f 100644 (file)
--- a/include/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/linux/netfilter_ipv6/ip6t_rt.h
@@ -1,18 +1,19 @@
  #ifndef _IP6T_RT_H
  #define _IP6T_RT_H
  
+#include <linux/types.h>
  /*#include <linux/in6.h>*/
  
  #define IP6T_RT_HOPS 16
  
  struct ip6t_rt {
-       u_int32_t rt_type;                      /* Routing Type */
-       u_int32_t segsleft[2];                  /* Segments Left */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  flags;                        /*  */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 rt_type;                  /* Routing Type */
+       __u32 segsleft[2];                      /* Segments Left */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  flags;                    /*  */
+       __u8  invflags;                 /* Inverse flags */
         struct in6_addr addrs[IP6T_RT_HOPS];    /* Hops */
-       u_int8_t addrnr;                        /* Nr of Addresses */
+       __u8 addrnr;                    /* Nr of Addresses */
  };
  
  #define IP6T_RT_TYP            0x01
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h

index 2cfa4bc..776cd93 100644 (file)
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -481,4 +481,16 @@ struct tc_drr_stats {
         __u32   deficit;
  };
  
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+struct tc_mqprio_qopt {
+       __u8    num_tc;
+       __u8    prio_tc_map[TC_QOPT_BITMASK + 1];
+       __u8    hw;
+       __u16   count[TC_QOPT_MAX_QUEUE];
+       __u16   offset[TC_QOPT_MAX_QUEUE];
+};
+
  #endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h

index bf221d6..6e946da 100644 (file)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1801,6 +1801,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
                      prefetch(skb->prev), (skb != (struct sk_buff *)(queue));   \
                      skb = skb->prev)
  
+#define skb_queue_reverse_walk_safe(queue, skb, tmp)                           \
+               for (skb = (queue)->prev, tmp = skb->prev;                      \
+                    skb != (struct sk_buff *)(queue);                          \
+                    skb = tmp, tmp = skb->prev)
+
+#define skb_queue_reverse_walk_from_safe(queue, skb, tmp)                      \
+               for (tmp = skb->prev;                                           \
+                    skb != (struct sk_buff *)(queue);                          \
+                    skb = tmp, tmp = skb->prev)
  
  static inline bool skb_has_frag_list(const struct sk_buff *skb)
  {
diff --git a/include/net/dst.h b/include/net/dst.h

index 93b0310..be5a0d4 100644 (file)
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -72,7 +72,7 @@ struct dst_entry {
  
         u32                     _metrics[RTAX_MAX];
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         __u32                   tclassid;
  #else
         __u32                   __pad2;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h

index 07bdb5e..65d1fcd 100644 (file)
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -55,7 +55,7 @@ struct fib_nh {
         int                     nh_weight;
         int                     nh_power;
  #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         __u32                   nh_tclassid;
  #endif
         int                     nh_oif;
@@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
  extern int __net_init fib4_rules_init(struct net *net);
  extern void __net_exit fib4_rules_exit(struct net *net);
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  extern u32 fib_rules_tclass(struct fib_result *res);
  #endif
  
@@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id);
  
  static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
  {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  #ifdef CONFIG_IP_MULTIPLE_TABLES
         u32 rtag;
  #endif
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h

index b7bbd6c..b23bea6 100644 (file)
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -28,6 +28,80 @@
  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
  #include <net/netfilter/nf_conntrack.h>
  #endif
+#include <net/net_namespace.h>         /* Netw namespace */
+
+/*
+ * Generic access of ipvs struct
+ */
+static inline struct netns_ipvs *net_ipvs(struct net* net)
+{
+       return net->ipvs;
+}
+/*
+ * Get net ptr from skb in traffic cases
+ * use skb_sknet when call is from userland (ioctl or netlink)
+ */
+static inline struct net *skb_net(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+       /*
+        * This is used for debug only.
+        * Start with the most likely hit
+        * End with BUG
+        */
+       if (likely(skb->dev && skb->dev->nd_net))
+               return dev_net(skb->dev);
+       if (skb_dst(skb)->dev)
+               return dev_net(skb_dst(skb)->dev);
+       WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
+                     __func__, __LINE__);
+       if (likely(skb->sk && skb->sk->sk_net))
+               return sock_net(skb->sk);
+       pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+               __func__, __LINE__);
+       BUG();
+#else
+       return dev_net(skb->dev ? : skb_dst(skb)->dev);
+#endif
+#else
+       return &init_net;
+#endif
+}
+
+static inline struct net *skb_sknet(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+       /* Start with the most likely hit */
+       if (likely(skb->sk && skb->sk->sk_net))
+               return sock_net(skb->sk);
+       WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
+                      __func__, __LINE__);
+       if (likely(skb->dev && skb->dev->nd_net))
+               return dev_net(skb->dev);
+       pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+               __func__, __LINE__);
+       BUG();
+#else
+       return sock_net(skb->sk);
+#endif
+#else
+       return &init_net;
+#endif
+}
+/*
+ * This one needed for single_open_net since net is stored directly in
+ * private not as a struct i.e. seq_file_net cant be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+       return (struct net *)seq->private;
+#else
+       return &init_net;
+#endif
+}
  
  /* Connections' size value needed by ip_vs_ctl.c */
  extern int ip_vs_conn_tab_size;
@@ -258,6 +332,23 @@ struct ip_vs_seq {
                                                    before last resized pkt */
  };
  
+/*
+ * counters per cpu
+ */
+struct ip_vs_counters {
+       __u32           conns;          /* connections scheduled */
+       __u32           inpkts;         /* incoming packets */
+       __u32           outpkts;        /* outgoing packets */
+       __u64           inbytes;        /* incoming bytes */
+       __u64           outbytes;       /* outgoing bytes */
+};
+/*
+ * Stats per cpu
+ */
+struct ip_vs_cpu_stats {
+       struct ip_vs_counters   ustats;
+       struct u64_stats_sync   syncp;
+};
  
  /*
   *     IPVS statistics objects
@@ -279,17 +370,34 @@ struct ip_vs_estimator {
  };
  
  struct ip_vs_stats {
-       struct ip_vs_stats_user ustats;         /* statistics */
+       struct ip_vs_stats_user ustats;         /* statistics */
         struct ip_vs_estimator  est;            /* estimator */
-
-       spinlock_t              lock;           /* spin lock */
+       struct ip_vs_cpu_stats  *cpustats;      /* per cpu counters */
+       spinlock_t              lock;           /* spin lock */
  };
  
+/*
+ * Helper Macros for per cpu
+ * ipvs->tot_stats->ustats.count
+ */
+#define IPVS_STAT_INC(ipvs, count)     \
+       __this_cpu_inc((ipvs)->ustats->count)
+
+#define IPVS_STAT_ADD(ipvs, count, value) \
+       do {\
+               write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
+                                    raw_smp_processor_id())); \
+               __this_cpu_add((ipvs)->ustats->count, value); \
+               write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
+                                  raw_smp_processor_id())); \
+       } while (0)
+
  struct dst_entry;
  struct iphdr;
  struct ip_vs_conn;
  struct ip_vs_app;
  struct sk_buff;
+struct ip_vs_proto_data;
  
  struct ip_vs_protocol {
         struct ip_vs_protocol   *next;
@@ -297,21 +405,22 @@ struct ip_vs_protocol {
         u16                     protocol;
         u16                     num_states;
         int                     dont_defrag;
-       atomic_t                appcnt;         /* counter of proto app incs */
-       int                     *timeout_table; /* protocol timeout table */
  
         void (*init)(struct ip_vs_protocol *pp);
  
         void (*exit)(struct ip_vs_protocol *pp);
  
+       void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+       void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
         int (*conn_schedule)(int af, struct sk_buff *skb,
-                            struct ip_vs_protocol *pp,
+                            struct ip_vs_proto_data *pd,
                              int *verdict, struct ip_vs_conn **cpp);
  
         struct ip_vs_conn *
         (*conn_in_get)(int af,
                        const struct sk_buff *skb,
-                      struct ip_vs_protocol *pp,
                        const struct ip_vs_iphdr *iph,
                        unsigned int proto_off,
                        int inverse);
@@ -319,7 +428,6 @@ struct ip_vs_protocol {
         struct ip_vs_conn *
         (*conn_out_get)(int af,
                         const struct sk_buff *skb,
-                       struct ip_vs_protocol *pp,
                         const struct ip_vs_iphdr *iph,
                         unsigned int proto_off,
                         int inverse);
@@ -337,11 +445,11 @@ struct ip_vs_protocol {
  
         int (*state_transition)(struct ip_vs_conn *cp, int direction,
                                 const struct sk_buff *skb,
-                               struct ip_vs_protocol *pp);
+                               struct ip_vs_proto_data *pd);
  
-       int (*register_app)(struct ip_vs_app *inc);
+       int (*register_app)(struct net *net, struct ip_vs_app *inc);
  
-       void (*unregister_app)(struct ip_vs_app *inc);
+       void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
  
         int (*app_conn_bind)(struct ip_vs_conn *cp);
  
@@ -350,14 +458,26 @@ struct ip_vs_protocol {
                              int offset,
                              const char *msg);
  
-       void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
+       void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
+};
  
-       int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+       struct ip_vs_proto_data *next;
+       struct ip_vs_protocol   *pp;
+       int                     *timeout_table; /* protocol timeout table */
+       atomic_t                appcnt;         /* counter of proto app incs. */
+       struct tcp_states_t     *tcp_state_table;
  };
  
-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
+                                                    unsigned short proto);
  
  struct ip_vs_conn_param {
+       struct net                      *net;
         const union nf_inet_addr        *caddr;
         const union nf_inet_addr        *vaddr;
         __be16                          cport;
@@ -375,16 +495,19 @@ struct ip_vs_conn_param {
   */
  struct ip_vs_conn {
         struct list_head        c_list;         /* hashed list heads */
-
+#ifdef CONFIG_NET_NS
+       struct net              *net;           /* Name space */
+#endif
         /* Protocol, addresses and port numbers */
-       u16                      af;            /* address family */
-       union nf_inet_addr       caddr;          /* client address */
-       union nf_inet_addr       vaddr;          /* virtual address */
-       union nf_inet_addr       daddr;          /* destination address */
-       volatile __u32           flags;          /* status flags */
-       __be16                   cport;
-       __be16                   vport;
-       __be16                   dport;
+       u16                     af;             /* address family */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __u32                   fwmark;         /* Fire wall mark from skb */
+       union nf_inet_addr      caddr;          /* client address */
+       union nf_inet_addr      vaddr;          /* virtual address */
+       union nf_inet_addr      daddr;          /* destination address */
+       volatile __u32          flags;          /* status flags */
         __u16                   protocol;       /* Which protocol (TCP/UDP) */
  
         /* counter and timer */
@@ -422,10 +545,38 @@ struct ip_vs_conn {
         struct ip_vs_seq        in_seq;         /* incoming seq. struct */
         struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
  
+       const struct ip_vs_pe   *pe;
         char                    *pe_data;
         __u8                    pe_data_len;
  };
  
+/*
+ *  To save some memory in conn table when name space is disabled.
+ */
+static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
+{
+#ifdef CONFIG_NET_NS
+       return cp->net;
+#else
+       return &init_net;
+#endif
+}
+static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
+{
+#ifdef CONFIG_NET_NS
+       cp->net = net;
+#endif
+}
+
+static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
+                                   struct net *net)
+{
+#ifdef CONFIG_NET_NS
+       return cp->net == net;
+#else
+       return 1;
+#endif
+}
  
  /*
   *     Extended internal versions of struct ip_vs_service_user and
@@ -485,6 +636,7 @@ struct ip_vs_service {
         unsigned                flags;    /* service status flags */
         unsigned                timeout;  /* persistent timeout in ticks */
         __be32                  netmask;  /* grouping granularity */
+       struct net              *net;
  
         struct list_head        destinations;  /* real server d-linked list */
         __u32                   num_dests;     /* number of servers */
@@ -510,8 +662,8 @@ struct ip_vs_dest {
         struct list_head        d_list;   /* for table with all the dests */
  
         u16                     af;             /* address family */
-       union nf_inet_addr      addr;           /* IP address of the server */
         __be16                  port;           /* port number of the server */
+       union nf_inet_addr      addr;           /* IP address of the server */
         volatile unsigned       flags;          /* dest status flags */
         atomic_t                conn_flags;     /* flags to copy to conn */
         atomic_t                weight;         /* server weight */
@@ -538,8 +690,8 @@ struct ip_vs_dest {
         /* for virtual service */
         struct ip_vs_service    *svc;           /* service it belongs to */
         __u16                   protocol;       /* which protocol (TCP/UDP) */
-       union nf_inet_addr      vaddr;          /* virtual IP address */
         __be16                  vport;          /* virtual port number */
+       union nf_inet_addr      vaddr;          /* virtual IP address */
         __u32                   vfwmark;        /* firewall mark of service */
  };
  
@@ -674,13 +826,14 @@ enum {
         IP_VS_DIR_LAST,
  };
  
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
                                          const union nf_inet_addr *caddr,
                                          __be16 cport,
                                          const union nf_inet_addr *vaddr,
                                          __be16 vport,
                                          struct ip_vs_conn_param *p)
  {
+       p->net = net;
         p->af = af;
         p->protocol = protocol;
         p->caddr = caddr;
@@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
  struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
  
  struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-                                           struct ip_vs_protocol *pp,
                                             const struct ip_vs_iphdr *iph,
                                             unsigned int proto_off,
                                             int inverse);
@@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
  struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
  
  struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-                                            struct ip_vs_protocol *pp,
                                              const struct ip_vs_iphdr *iph,
                                              unsigned int proto_off,
                                              int inverse);
@@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
  struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
                                   const union nf_inet_addr *daddr,
                                   __be16 dport, unsigned flags,
-                                 struct ip_vs_dest *dest);
+                                 struct ip_vs_dest *dest, __u32 fwmark);
  extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
  
  extern const char * ip_vs_state_name(__u16 proto, int state);
  
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
  extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_random_dropentry(struct net *net);
  extern int ip_vs_conn_init(void);
  extern void ip_vs_conn_cleanup(void);
  
@@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
   *      (from ip_vs_app.c)
   */
  #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
  extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
  extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
+                                 __u16 proto, __u16 port);
  extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
  extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
  
@@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
  void ip_vs_unbind_pe(struct ip_vs_service *svc);
  int register_ip_vs_pe(struct ip_vs_pe *pe);
  int unregister_ip_vs_pe(struct ip_vs_pe *pe);
-extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
-extern void ip_vs_pe_put(struct ip_vs_pe *pe);
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
+
+static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               __module_get(pe->module);
+}
+
+static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               module_put(pe->module);
+}
  
  /*
   *     IPVS protocol functions (from ip_vs_proto.c)
   */
  extern int ip_vs_protocol_init(void);
  extern void ip_vs_protocol_cleanup(void);
-extern void ip_vs_protocol_timeout_change(int flags);
+extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
  extern int *ip_vs_create_timeout_table(int *table, int size);
  extern int
  ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
  extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
  extern struct ip_vs_conn *
  ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-              struct ip_vs_protocol *pp, int *ignored);
+              struct ip_vs_proto_data *pd, int *ignored);
  extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-                       struct ip_vs_protocol *pp);
+                       struct ip_vs_proto_data *pd);
  
  
  /*
   *      IPVS control data and functions (from ip_vs_ctl.c)
   */
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
  extern struct ip_vs_stats ip_vs_stats;
  extern const struct ctl_path net_vs_ctl_path[];
+extern int sysctl_ip_vs_sync_ver;
  
+extern void ip_vs_sync_switch_mode(struct net *net, int mode);
  extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                   const union nf_inet_addr *vaddr, __be16 vport);
  
  static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
  }
  
  extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                           const union nf_inet_addr *daddr, __be16 dport);
  
  extern int ip_vs_use_count_inc(void);
@@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void);
  extern int ip_vs_control_init(void);
  extern void ip_vs_control_cleanup(void);
  extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
-               const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
+               __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+               __u16 protocol, __u32 fwmark);
  extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
  
  
@@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
   *      IPVS sync daemon data and function prototypes
   *      (from ip_vs_sync.c)
   */
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+                            __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);
  
  
  /*
@@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
   */
  extern int ip_vs_estimator_init(void);
  extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
  extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
  
  /*
@@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
  extern int ip_vs_drop_rate;
  extern int ip_vs_drop_counter;
  
-static __inline__ int ip_vs_todrop(void)
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
  {
-       if (!ip_vs_drop_rate) return 0;
-       if (--ip_vs_drop_counter > 0) return 0;
-       ip_vs_drop_counter = ip_vs_drop_rate;
+       if (!ipvs->drop_rate)
+               return 0;
+       if (--ipvs->drop_counter > 0)
+               return 0;
+       ipvs->drop_counter = ipvs->drop_rate;
         return 1;
  }
  
@@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
   *      Netfilter connection tracking
   *      (from ip_vs_nfct.c)
   */
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
  {
-       return sysctl_ip_vs_conntrack;
+       return ipvs->sysctl_conntrack;
  }
  
  extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
  
  #else
  
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
  {
         return 0;
  }
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h

index 1bf812b..b3b4a34 100644 (file)
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -20,6 +20,7 @@
  #include <net/netns/conntrack.h>
  #endif
  #include <net/netns/xfrm.h>
+#include <net/netns/ip_vs.h>
  
  struct proc_dir_entry;
  struct net_device;
@@ -94,6 +95,7 @@ struct net {
  #ifdef CONFIG_XFRM
         struct netns_xfrm       xfrm;
  #endif
+       struct netns_ipvs       *ipvs;
  };
  
  
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h

index d85cff1..d0d1337 100644 (file)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto {
  /* per conntrack: application helper private data */
  union nf_conntrack_help {
         /* insert conntrack helper private data (master) here */
+#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
         struct nf_ct_ftp_master ct_ftp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
+    defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
         struct nf_ct_pptp_master ct_pptp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_H323) || \
+    defined(CONFIG_NF_CONNTRACK_H323_MODULE)
         struct nf_ct_h323_master ct_h323_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SANE) || \
+    defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
         struct nf_ct_sane_master ct_sane_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
         struct nf_ct_sip_master ct_sip_info;
+#endif
  };
  
  #include <linux/types.h>
@@ -116,14 +129,14 @@ struct nf_conn {
         u_int32_t secmark;
  #endif
  
-       /* Storage reserved for other modules: */
-       union nf_conntrack_proto proto;
-
         /* Extensions */
         struct nf_ct_ext *ext;
  #ifdef CONFIG_NET_NS
         struct net *ct_net;
  #endif
+
+       /* Storage reserved for other modules, must be the last member */
+       union nf_conntrack_proto proto;
  };
  
  static inline struct nf_conn *
@@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
   * Allocate a hashtable of hlist_head (if nulls == 0),
   * or hlist_nulls_head (if nulls == 1)
   */
-extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
+extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
  
-extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
+extern void nf_ct_free_hashtable(void *hash, unsigned int size);
  
  extern struct nf_conntrack_tuple_hash *
  __nf_conntrack_find(struct net *net, u16 zone,
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h

index 96ba5f7..8fdb04b 100644 (file)
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache {
  static inline struct nf_conntrack_ecache *
  nf_ct_ecache_find(const struct nf_conn *ct)
  {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
         return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+#else
+       return NULL;
+#endif
  }
  
  static inline struct nf_conntrack_ecache *
  nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
  {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
         struct net *net = nf_ct_net(ct);
         struct nf_conntrack_ecache *e;
  
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
                 e->expmask = expmask;
         }
         return e;
+#else
+       return NULL;
+#endif
  };
  
  #ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier {
         int (*fcn)(unsigned int events, struct nf_ct_event *item);
  };
  
-extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
  extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
  extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
  
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier {
         int (*fcn)(unsigned int events, struct nf_exp_event *item);
  };
  
-extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
  extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
  extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
  
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h

index 0772d29..2dcf317 100644 (file)
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -7,10 +7,19 @@
  
  enum nf_ct_ext_id {
         NF_CT_EXT_HELPER,
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
         NF_CT_EXT_NAT,
+#endif
         NF_CT_EXT_ACCT,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
         NF_CT_EXT_ECACHE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
         NF_CT_EXT_ZONE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       NF_CT_EXT_TSTAMP,
+#endif
         NF_CT_EXT_NUM,
  };
  
@@ -19,6 +28,7 @@ enum nf_ct_ext_id {
  #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
  #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
  #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
  
  /* Extensions: optional stuff which isn't permanently in struct. */
  struct nf_ct_ext {
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h

index 32c305d..f1c1311 100644 (file)
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
  extern int nf_conntrack_helper_init(void);
  extern void nf_conntrack_helper_fini(void);
  
+extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
+                                      unsigned int protoff,
+                                      struct nf_conn *ct,
+                                      enum ip_conntrack_info ctinfo,
+                                      unsigned int timeout);
+
  #endif /*_NF_CONNTRACK_HELPER_H*/
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h

index a754761..e8010f4 100644 (file)
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto {
         struct module *me;
  };
  
-extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
  
  /* Protocol registration. */
  extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h

new file mode 100644 (file)

index 0000000..fc9c82b
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -0,0 +1,65 @@
+#ifndef _NF_CONNTRACK_TSTAMP_H
+#define _NF_CONNTRACK_TSTAMP_H
+
+#include <net/net_namespace.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_tstamp {
+       u_int64_t start;
+       u_int64_t stop;
+};
+
+static inline
+struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
+#else
+       return NULL;
+#endif
+}
+
+static inline
+struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       struct net *net = nf_ct_net(ct);
+
+       if (!net->ct.sysctl_tstamp)
+               return NULL;
+
+       return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
+#else
+       return NULL;
+#endif
+};
+
+static inline bool nf_ct_tstamp_enabled(struct net *net)
+{
+       return net->ct.sysctl_tstamp != 0;
+}
+
+static inline void nf_ct_set_tstamp(struct net *net, bool enable)
+{
+       net->ct.sysctl_tstamp = enable;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+extern int nf_conntrack_tstamp_init(struct net *net);
+extern void nf_conntrack_tstamp_fini(struct net *net);
+#else
+static inline int nf_conntrack_tstamp_init(struct net *net)
+{
+       return 0;
+}
+
+static inline void nf_conntrack_tstamp_fini(struct net *net)
+{
+       return;
+}
+#endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */
+
+#endif /* _NF_CONNTRACK_TSTAMP_H */
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h

index f5f09f0..aff80b1 100644 (file)
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat {
  /* per conntrack: nat application helper private data */
  union nf_conntrack_nat_help {
         /* insert nat helper private data here */
+#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
         struct nf_nat_pptp nat_pptp_info;
+#endif
  };
  
  struct nf_conn;
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
  
  static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
  {
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
         return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+#else
+       return NULL;
+#endif
  }
  
  #else  /* !__KERNEL__: iptables wants this to compile. */
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h

index 33602ab..3dc7b98 100644 (file)
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
                                      enum nf_nat_manip_type manip)
  {
         if (manip == IP_NAT_MANIP_SRC)
-               return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_SRC_NAT_DONE;
         else
-               return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_DST_NAT_DONE;
  }
  
  struct nlattr;
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h

index d4958d4..341eb08 100644 (file)
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -21,15 +21,15 @@ struct netns_ct {
         int                     sysctl_events;
         unsigned int            sysctl_events_retry_timeout;
         int                     sysctl_acct;
+       int                     sysctl_tstamp;
         int                     sysctl_checksum;
         unsigned int            sysctl_log_invalid; /* Log invalid packets */
  #ifdef CONFIG_SYSCTL
         struct ctl_table_header *sysctl_header;
         struct ctl_table_header *acct_sysctl_header;
+       struct ctl_table_header *tstamp_sysctl_header;
         struct ctl_table_header *event_sysctl_header;
  #endif
-       int                     hash_vmalloc;
-       int                     expect_vmalloc;
         char                    *slabname;
  };
  #endif
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h

new file mode 100644 (file)

index 0000000..259ebac
--- /dev/null
+++ b/include/net/netns/ip_vs.h
@@ -0,0 +1,143 @@
+/*
+ *  IP Virtual Server
+ *  Data structure for network namspace
+ *
+ */
+
+#ifndef IP_VS_H_
+#define IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+       int                     gen;            /* Generation */
+       /*
+        *      Hash table: for real service lookups
+        */
+       #define IP_VS_RTAB_BITS 4
+       #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+       #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+       struct list_head        rs_table[IP_VS_RTAB_SIZE];
+       /* ip_vs_app */
+       struct list_head        app_list;
+       struct mutex            app_mutex;
+       struct lock_class_key   app_key;        /* mutex debuging */
+
+       /* ip_vs_proto */
+       #define IP_VS_PROTO_TAB_SIZE    32      /* must be power of 2 */
+       struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+       /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+       #define TCP_APP_TAB_BITS        4
+       #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
+       #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
+       struct list_head        tcp_apps[TCP_APP_TAB_SIZE];
+       spinlock_t              tcp_app_lock;
+#endif
+       /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+       #define UDP_APP_TAB_BITS        4
+       #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
+       #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
+       struct list_head        udp_apps[UDP_APP_TAB_SIZE];
+       spinlock_t              udp_app_lock;
+#endif
+       /* ip_vs_proto_sctp */
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+       #define SCTP_APP_TAB_BITS       4
+       #define SCTP_APP_TAB_SIZE       (1 << SCTP_APP_TAB_BITS)
+       #define SCTP_APP_TAB_MASK       (SCTP_APP_TAB_SIZE - 1)
+       /* Hash table for SCTP application incarnations  */
+       struct list_head        sctp_apps[SCTP_APP_TAB_SIZE];
+       spinlock_t              sctp_app_lock;
+#endif
+       /* ip_vs_conn */
+       atomic_t                conn_count;      /*  connection counter */
+
+       /* ip_vs_ctl */
+       struct ip_vs_stats              *tot_stats;  /* Statistics & est. */
+       struct ip_vs_cpu_stats __percpu *cpustats;   /* Stats per cpu */
+       seqcount_t                      *ustats_seq; /* u64 read retry */
+
+       int                     num_services;    /* no of virtual services */
+       /* 1/rate drop and drop-entry variables */
+       struct delayed_work     defense_work;   /* Work handler */
+       int                     drop_rate;
+       int                     drop_counter;
+       atomic_t                dropentry;
+       /* locks in ctl.c */
+       spinlock_t              dropentry_lock;  /* drop entry handling */
+       spinlock_t              droppacket_lock; /* drop packet handling */
+       spinlock_t              securetcp_lock;  /* state and timeout tables */
+       rwlock_t                rs_lock;         /* real services table */
+       /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+       struct lock_class_key   ctl_key;        /* ctl_mutex debuging */
+       /* Trash for destinations */
+       struct list_head        dest_trash;
+       /* Service counters */
+       atomic_t                ftpsvc_counter;
+       atomic_t                nullsvc_counter;
+
+       /* sys-ctl struct */
+       struct ctl_table_header *sysctl_hdr;
+       struct ctl_table        *sysctl_tbl;
+       /* sysctl variables */
+       int                     sysctl_amemthresh;
+       int                     sysctl_am_droprate;
+       int                     sysctl_drop_entry;
+       int                     sysctl_drop_packet;
+       int                     sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+       int                     sysctl_conntrack;
+#endif
+       int                     sysctl_snat_reroute;
+       int                     sysctl_sync_ver;
+       int                     sysctl_cache_bypass;
+       int                     sysctl_expire_nodest_conn;
+       int                     sysctl_expire_quiescent_template;
+       int                     sysctl_sync_threshold[2];
+       int                     sysctl_nat_icmp_send;
+
+       /* ip_vs_lblc */
+       int                     sysctl_lblc_expiration;
+       struct ctl_table_header *lblc_ctl_header;
+       struct ctl_table        *lblc_ctl_table;
+       /* ip_vs_lblcr */
+       int                     sysctl_lblcr_expiration;
+       struct ctl_table_header *lblcr_ctl_header;
+       struct ctl_table        *lblcr_ctl_table;
+       /* ip_vs_est */
+       struct list_head        est_list;       /* estimator list */
+       spinlock_t              est_lock;
+       struct timer_list       est_timer;      /* Estimation timer */
+       /* ip_vs_sync */
+       struct list_head        sync_queue;
+       spinlock_t              sync_lock;
+       struct ip_vs_sync_buff  *sync_buff;
+       spinlock_t              sync_buff_lock;
+       struct sockaddr_in      sync_mcast_addr;
+       struct task_struct      *master_thread;
+       struct task_struct      *backup_thread;
+       int                     send_mesg_maxlen;
+       int                     recv_mesg_maxlen;
+       volatile int            sync_state;
+       volatile int            master_syncid;
+       volatile int            backup_syncid;
+       /* multicast interface name */
+       char                    master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+       char                    backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+       /* net name space ptr */
+       struct net              *net;            /* Needed by timer routines */
+};
+
+#endif /* IP_VS_H_ */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h

index d68c3f1..e2e2ef5 100644 (file)
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,7 +43,6 @@ struct netns_ipv4 {
         struct xt_table         *nat_table;
         struct hlist_head       *nat_bysource;
         unsigned int            nat_htable_size;
-       int                     nat_vmalloced;
  #endif
  
         int sysctl_icmp_echo_ignore_all;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h

index 160a407..16626a0 100644 (file)
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -31,10 +31,12 @@ enum qdisc_state_t {
   * following bits are only changed while qdisc lock is held
   */
  enum qdisc___state_t {
-       __QDISC___STATE_RUNNING,
+       __QDISC___STATE_RUNNING = 1,
+       __QDISC___STATE_THROTTLED = 2,
  };
  
  struct qdisc_size_table {
+       struct rcu_head         rcu;
         struct list_head        list;
         struct tc_sizespec      szopts;
         int                     refcnt;
@@ -46,14 +48,13 @@ struct Qdisc {
         struct sk_buff *        (*dequeue)(struct Qdisc *dev);
         unsigned                flags;
  #define TCQ_F_BUILTIN          1
-#define TCQ_F_THROTTLED                2
-#define TCQ_F_INGRESS          4
-#define TCQ_F_CAN_BYPASS       8
-#define TCQ_F_MQROOT           16
+#define TCQ_F_INGRESS          2
+#define TCQ_F_CAN_BYPASS       4
+#define TCQ_F_MQROOT           8
  #define TCQ_F_WARN_NONWC       (1 << 16)
         int                     padded;
         struct Qdisc_ops        *ops;
-       struct qdisc_size_table *stab;
+       struct qdisc_size_table __rcu *stab;
         struct list_head        list;
         u32                     handle;
         u32                     parent;
@@ -78,25 +79,43 @@ struct Qdisc {
         unsigned long           state;
         struct sk_buff_head     q;
         struct gnet_stats_basic_packed bstats;
-       unsigned long           __state;
+       unsigned int            __state;
         struct gnet_stats_queue qstats;
         struct rcu_head         rcu_head;
         spinlock_t              busylock;
  };
  
-static inline bool qdisc_is_running(struct Qdisc *qdisc)
+static inline bool qdisc_is_running(const struct Qdisc *qdisc)
  {
-       return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+       return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
  }
  
  static inline bool qdisc_run_begin(struct Qdisc *qdisc)
  {
-       return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+       if (qdisc_is_running(qdisc))
+               return false;
+       qdisc->__state |= __QDISC___STATE_RUNNING;
+       return true;
  }
  
  static inline void qdisc_run_end(struct Qdisc *qdisc)
  {
-       __clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+       qdisc->__state &= ~__QDISC___STATE_RUNNING;
+}
+
+static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
+{
+       return (qdisc->__state & __QDISC___STATE_THROTTLED) ? true : false;
+}
+
+static inline void qdisc_throttled(struct Qdisc *qdisc)
+{
+       qdisc->__state |= __QDISC___STATE_THROTTLED;
+}
+
+static inline void qdisc_unthrottled(struct Qdisc *qdisc)
+{
+       qdisc->__state &= ~__QDISC___STATE_THROTTLED;
  }
  
  struct Qdisc_class_ops {
@@ -331,8 +350,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                                  struct Qdisc_ops *ops);
  extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
                                        struct Qdisc_ops *ops, u32 parentid);
-extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
-                                  struct qdisc_size_table *stab);
+extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+                                     const struct qdisc_size_table *stab);
  extern void tcf_destroy(struct tcf_proto *tp);
  extern void tcf_destroy_chain(struct tcf_proto **fl);
  
@@ -411,12 +430,20 @@ enum net_xmit_qdisc_t {
  #define net_xmit_drop_count(e) (1)
  #endif
  
-static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
+                                          const struct Qdisc *sch)
  {
  #ifdef CONFIG_NET_SCHED
-       if (sch->stab)
-               qdisc_calculate_pkt_len(skb, sch->stab);
+       struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
+
+       if (stab)
+               __qdisc_calculate_pkt_len(skb, stab);
  #endif
+}
+
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+       qdisc_calculate_pkt_len(skb, sch);
         return sch->enqueue(skb, sch);
  }
  
diff --git a/include/net/sock.h b/include/net/sock.h

index d884d26..ba6465b 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1189,7 +1189,7 @@ extern void sk_filter_release_rcu(struct rcu_head *rcu);
  static inline void sk_filter_release(struct sk_filter *fp)
  {
         if (atomic_dec_and_test(&fp->refcnt))
-               call_rcu_bh(&fp->rcu, sk_filter_release_rcu);
+               call_rcu(&fp->rcu, sk_filter_release_rcu);
  }
  
  static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
diff --git a/kernel/audit.c b/kernel/audit.c

index e495624..162e88e 100644 (file)
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -74,6 +74,8 @@ static int    audit_initialized;
  int            audit_enabled;
  int            audit_ever_enabled;
  
+EXPORT_SYMBOL_GPL(audit_enabled);
+
  /* Default state when kernel boots without any parameters. */
  static int     audit_default;
  
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c

index 17c5ba7..29a54cc 100644 (file)
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -59,7 +59,6 @@
                                                  * safely advertise a maxsize
                                                  * of 64k */
  
-#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
  /**
   * struct p9_trans_rdma - RDMA transport instance
   *
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c

index 50a46af..2ed0056 100644 (file)
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -22,9 +22,15 @@
  #include <linux/netfilter_bridge/ebtables.h>
  #include <linux/netfilter_bridge/ebt_ip6.h>
  
-struct tcpudphdr {
-       __be16 src;
-       __be16 dst;
+union pkthdr {
+       struct {
+               __be16 src;
+               __be16 dst;
+       } tcpudphdr;
+       struct {
+               u8 type;
+               u8 code;
+       } icmphdr;
  };
  
  static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
         const struct ebt_ip6_info *info = par->matchinfo;
         const struct ipv6hdr *ih6;
         struct ipv6hdr _ip6h;
-       const struct tcpudphdr *pptr;
-       struct tcpudphdr _ports;
+       const union pkthdr *pptr;
+       union pkthdr _pkthdr;
  
         ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
         if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
                         return false;
                 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
                         return false;
-               if (!(info->bitmask & EBT_IP6_DPORT) &&
-                   !(info->bitmask & EBT_IP6_SPORT))
+               if (!(info->bitmask & ( EBT_IP6_DPORT |
+                                       EBT_IP6_SPORT | EBT_IP6_ICMP6)))
                         return true;
-               pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
-                                         &_ports);
+
+               /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
+               pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
+                                         &_pkthdr);
                 if (pptr == NULL)
                         return false;
                 if (info->bitmask & EBT_IP6_DPORT) {
-                       u32 dst = ntohs(pptr->dst);
+                       u16 dst = ntohs(pptr->tcpudphdr.dst);
                         if (FWINV(dst < info->dport[0] ||
                                   dst > info->dport[1], EBT_IP6_DPORT))
                                 return false;
                 }
                 if (info->bitmask & EBT_IP6_SPORT) {
-                       u32 src = ntohs(pptr->src);
+                       u16 src = ntohs(pptr->tcpudphdr.src);
                         if (FWINV(src < info->sport[0] ||
                                   src > info->sport[1], EBT_IP6_SPORT))
                         return false;
                 }
-               return true;
+               if ((info->bitmask & EBT_IP6_ICMP6) &&
+                    FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
+                          pptr->icmphdr.type > info->icmpv6_type[1] ||
+                          pptr->icmphdr.code < info->icmpv6_code[0] ||
+                          pptr->icmphdr.code > info->icmpv6_code[1],
+                                                       EBT_IP6_ICMP6))
+                       return false;
         }
         return true;
  }
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
                 return -EINVAL;
         if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
                 return -EINVAL;
+       if (info->bitmask & EBT_IP6_ICMP6) {
+               if ((info->invflags & EBT_IP6_PROTO) ||
+                    info->protocol != IPPROTO_ICMPV6)
+                       return -EINVAL;
+               if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
+                   info->icmpv6_code[0] > info->icmpv6_code[1])
+                       return -EINVAL;
+       }
         return 0;
  }
  
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c

index 16df053..5f1825d 100644 (file)
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
  
         newinfo->entries_size = size;
  
+       xt_compat_init_offsets(AF_INET, info->nentries);
         return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
                                                         entries, newinfo);
  }
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c

index c665de7..f1f98d9 100644 (file)
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -23,10 +23,8 @@
  #include <asm/atomic.h>
  
  #define MAX_PHY_LAYERS 7
-#define PHY_NAME_LEN 20
  
  #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
-#define RFM_FRAGMENT_SIZE 4030
  
  /* Information about CAIF physical interfaces held by Config Module in order
   * to manage physical interfaces
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c

index d3ed264..27dab26 100644 (file)
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -18,7 +18,6 @@
  #define DGM_CMD_BIT  0x80
  #define DGM_FLOW_OFF 0x81
  #define DGM_FLOW_ON  0x80
-#define DGM_CTRL_PKT_SIZE 1
  #define DGM_MTU 1500
  
  static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c

index 9297f7d..8303fe3 100644 (file)
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -25,7 +25,6 @@ struct cfserl {
         spinlock_t sync;
         bool usestx;
  };
-#define STXLEN(layr) (layr->usestx ? 1 : 0)
  
  static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
  static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c

index efad410..315c0d6 100644 (file)
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -20,7 +20,7 @@
  #define UTIL_REMOTE_SHUTDOWN 0x82
  #define UTIL_FLOW_OFF 0x81
  #define UTIL_FLOW_ON  0x80
-#define UTIL_CTRL_PKT_SIZE 1
+
  static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
  static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
  
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c

index 3b425b1..c3b1dec 100644 (file)
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -17,7 +17,7 @@
  #define VEI_FLOW_OFF 0x81
  #define VEI_FLOW_ON  0x80
  #define VEI_SET_PIN  0x82
-#define VEI_CTRL_PKT_SIZE 1
+
  #define container_obj(layr) container_of(layr, struct cfsrvl, layer)
  
  static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
diff --git a/net/core/dev.c b/net/core/dev.c

index 24ea2d7..d162ba8 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1286,7 +1286,7 @@ static int __dev_close(struct net_device *dev)
         return __dev_close_many(&single);
  }
  
-int dev_close_many(struct list_head *head)
+static int dev_close_many(struct list_head *head)
  {
         struct net_device *dev, *tmp;
         LIST_HEAD(tmp_list);
@@ -1594,6 +1594,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
         rcu_read_unlock();
  }
  
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+       int i;
+       struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+       /* If TC0 is invalidated disable TC mapping */
+       if (tc->offset + tc->count > txq) {
+               pr_warning("Number of in use tx queues changed "
+                          "invalidating tc mappings. Priority "
+                          "traffic classification disabled!\n");
+               dev->num_tc = 0;
+               return;
+       }
+
+       /* Invalidated prio to tc mappings set to TC0 */
+       for (i = 1; i < TC_BITMASK + 1; i++) {
+               int q = netdev_get_prio_tc_map(dev, i);
+
+               tc = &dev->tc_to_txq[q];
+               if (tc->offset + tc->count > txq) {
+                       pr_warning("Number of in use tx queues "
+                                  "changed. Priority %i to tc "
+                                  "mapping %i is no longer valid "
+                                  "setting map to 0\n",
+                                  i, q);
+                       netdev_set_prio_tc_map(dev, i, 0);
+               }
+       }
+}
+
  /*
   * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
   * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1613,6 +1655,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
                 if (rc)
                         return rc;
  
+               if (dev->num_tc)
+                       netif_setup_tc(dev, txq);
+
                 if (txq < dev->real_num_tx_queues)
                         qdisc_reset_all_tx_gt(dev, txq);
         }
@@ -2162,6 +2207,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
                   unsigned int num_tx_queues)
  {
         u32 hash;
+       u16 qoffset = 0;
+       u16 qcount = num_tx_queues;
  
         if (skb_rx_queue_recorded(skb)) {
                 hash = skb_get_rx_queue(skb);
@@ -2170,13 +2217,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
                 return hash;
         }
  
+       if (dev->num_tc) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+               qoffset = dev->tc_to_txq[tc].offset;
+               qcount = dev->tc_to_txq[tc].count;
+       }
+
         if (skb->sk && skb->sk->sk_hash)
                 hash = skb->sk->sk_hash;
         else
                 hash = (__force u16) skb->protocol ^ skb->rxhash;
         hash = jhash_1word(hash, hashrnd);
  
-       return (u16) (((u64) hash * num_tx_queues) >> 32);
+       return (u16) (((u64) hash * qcount) >> 32) + qoffset;
  }
  EXPORT_SYMBOL(__skb_tx_hash);
  
@@ -2273,15 +2326,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                                  struct netdev_queue *txq)
  {
         spinlock_t *root_lock = qdisc_lock(q);
-       bool contended = qdisc_is_running(q);
+       bool contended;
         int rc;
  
+       qdisc_skb_cb(skb)->pkt_len = skb->len;
+       qdisc_calculate_pkt_len(skb, q);
         /*
          * Heuristic to force contended enqueues to serialize on a
          * separate lock before trying to get qdisc main lock.
          * This permits __QDISC_STATE_RUNNING owner to get the lock more often
          * and dequeue packets faster.
          */
+       contended = qdisc_is_running(q);
         if (unlikely(contended))
                 spin_lock(&q->busylock);
  
@@ -2299,7 +2355,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
                         skb_dst_force(skb);
  
-               qdisc_skb_cb(skb)->pkt_len = skb->len;
                 qdisc_bstats_update(q, skb);
  
                 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2314,7 +2369,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                 rc = NET_XMIT_SUCCESS;
         } else {
                 skb_dst_force(skb);
-               rc = qdisc_enqueue_root(skb, q);
+               rc = q->enqueue(skb, q) & NET_XMIT_MASK;
                 if (qdisc_run_begin(q)) {
                         if (unlikely(contended)) {
                                 spin_unlock(&q->busylock);
@@ -4572,6 +4627,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
  }
  EXPORT_SYMBOL(dev_set_mtu);
  
+/**
+ *     dev_set_group - Change group this device belongs to
+ *     @dev: device
+ *     @new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+       dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
  /**
   *     dev_set_mac_address - Change Media Access Control Address
   *     @dev: device
@@ -5679,6 +5745,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
         dev->priv_flags = IFF_XMIT_DST_RELEASE;
         setup(dev);
         strcpy(dev->name, name);
+       dev->group = INIT_NETDEV_GROUP;
         return dev;
  
  free_pcpu:
diff --git a/net/core/filter.c b/net/core/filter.c

index afc5837..232b187 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
         if (err)
                 return err;
  
-       rcu_read_lock_bh();
-       filter = rcu_dereference_bh(sk->sk_filter);
+       rcu_read_lock();
+       filter = rcu_dereference(sk->sk_filter);
         if (filter) {
                 unsigned int pkt_len = sk_run_filter(skb, filter->insns);
  
                 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
         }
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
  
         return err;
  }
diff --git a/net/core/neighbour.c b/net/core/neighbour.c

index 60a9029..799f06e 100644 (file)
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
  {
         size_t size = entries * sizeof(struct neighbour *);
         struct neigh_hash_table *ret;
-       struct neighbour **buckets;
+       struct neighbour __rcu **buckets;
  
         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
         if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
         if (size <= PAGE_SIZE)
                 buckets = kzalloc(size, GFP_ATOMIC);
         else
-               buckets = (struct neighbour **)
+               buckets = (struct neighbour __rcu **)
                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
                                            get_order(size));
         if (!buckets) {
                 kfree(ret);
                 return NULL;
         }
-       rcu_assign_pointer(ret->hash_buckets, buckets);
+       ret->hash_buckets = buckets;
         ret->hash_mask = entries - 1;
         get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
         return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
                                                     struct neigh_hash_table,
                                                     rcu);
         size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
-       struct neighbour **buckets = nht->hash_buckets;
+       struct neighbour __rcu **buckets = nht->hash_buckets;
  
         if (size <= PAGE_SIZE)
                 kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
                 panic("cannot create neighbour proc dir entry");
  #endif
  
-       tbl->nht = neigh_hash_alloc(8);
+       RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
  
         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
         }
         write_unlock(&neigh_tbl_lock);
  
-       call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+       call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+                neigh_hash_free_rcu);
         tbl->nht = NULL;
  
         kfree(tbl->phash_buckets);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c

index 750db57..c668f8c 100644 (file)
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
                    netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
         NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
         NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+       NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
  
         if (dev->ifindex != dev->iflink)
                 NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1265,6 +1266,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
                 modified = 1;
         }
  
+       if (tb[IFLA_GROUP]) {
+               dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+               modified = 1;
+       }
+
         /*
          * Interface selected by interface index but interface
          * name provided implies that a name change has been
@@ -1542,6 +1548,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
                 set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
         if (tb[IFLA_LINKMODE])
                 dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+       if (tb[IFLA_GROUP])
+               dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
  
         return dev;
  
@@ -1552,6 +1560,24 @@ err:
  }
  EXPORT_SYMBOL(rtnl_create_link);
  
+static int rtnl_group_changelink(struct net *net, int group,
+               struct ifinfomsg *ifm,
+               struct nlattr **tb)
+{
+       struct net_device *dev;
+       int err;
+
+       for_each_netdev(net, dev) {
+               if (dev->group == group) {
+                       err = do_setlink(dev, ifm, tb, NULL, 0);
+                       if (err < 0)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
  {
         struct net *net = sock_net(skb->sk);
@@ -1579,10 +1605,12 @@ replay:
         ifm = nlmsg_data(nlh);
         if (ifm->ifi_index > 0)
                 dev = __dev_get_by_index(net, ifm->ifi_index);
-       else if (ifname[0])
-               dev = __dev_get_by_name(net, ifname);
-       else
-               dev = NULL;
+       else {
+               if (ifname[0])
+                       dev = __dev_get_by_name(net, ifname);
+               else
+                       dev = NULL;
+       }
  
         err = validate_linkmsg(dev, tb);
         if (err < 0)
@@ -1646,8 +1674,13 @@ replay:
                         return do_setlink(dev, ifm, tb, ifname, modified);
                 }
  
-               if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+               if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+                       if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+                               return rtnl_group_changelink(net,
+                                               nla_get_u32(tb[IFLA_GROUP]),
+                                               ifm, tb);
                         return -ENODEV;
+               }
  
                 if (ifm->ifi_index)
                         return -EOPNOTSUPP;
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c

index f2abd37..b66600b 100644 (file)
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -59,7 +59,6 @@ struct dn_hash
  };
  
  #define dz_key_0(key)          ((key).datum = 0)
-#define dz_prefix(key,dz)      ((key).datum)
  
  #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
         for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig

index a5a1050..8949a05 100644 (file)
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE
           handled by the klogd daemon which is responsible for kernel messages
           ("man klogd").
  
+config IP_ROUTE_CLASSID
+       bool
+
  config IP_PNP
         bool "IP: kernel level autoconfiguration"
         help
@@ -657,4 +660,3 @@ config TCP_MD5SIG
           on the Internet.
  
           If unsure, say N.
-
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c

index 7981a24..9cefe72 100644 (file)
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -41,12 +41,12 @@ struct fib4_rule {
         __be32                  srcmask;
         __be32                  dst;
         __be32                  dstmask;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         u32                     tclassid;
  #endif
  };
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  u32 fib_rules_tclass(struct fib_result *res)
  {
         return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
         if (frh->dst_len)
                 rule4->dst = nla_get_be32(tb[FRA_DST]);
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         if (tb[FRA_FLOW])
                 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
  #endif
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
         if (frh->tos && (rule4->tos != frh->tos))
                 return 0;
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
                 return 0;
  #endif
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
         if (rule4->src_len)
                 NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         if (rule4->tclassid)
                 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
  #endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c

index 12d3dc3..9aff11d 100644 (file)
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
  #ifdef CONFIG_IP_ROUTE_MULTIPATH
                     nh->nh_weight != onh->nh_weight ||
  #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                     nh->nh_tclassid != onh->nh_tclassid ||
  #endif
                     ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
  
                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                         nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                         nla = nla_find(attrs, attrlen, RTA_FLOW);
                         nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
  #endif
@@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                         if (nla && nla_get_be32(nla) != nh->nh_gw)
                                 return 1;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                         nla = nla_find(attrs, attrlen, RTA_FLOW);
                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
                                 return 1;
@@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
                         goto err_inval;
                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
                         goto err_inval;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
                         goto err_inval;
  #endif
@@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
                 nh->nh_oif = cfg->fc_oif;
                 nh->nh_gw = cfg->fc_gw;
                 nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                 nh->nh_tclassid = cfg->fc_flow;
  #endif
  #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
  
                 if (fi->fib_nh->nh_oif)
                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                 if (fi->fib_nh[0].nh_tclassid)
                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
  #endif
@@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
  
                         if (nh->nh_gw)
                                 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                         if (nh->nh_tclassid)
                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
  #endif
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c

index d859bcc..d7b2b09 100644 (file)
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
                 }
         }
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         if (unlikely(skb_dst(skb)->tclassid)) {
                 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
                 u32 idx = skb_dst(skb)->tclassid;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig

index babd1a2..f926a31 100644 (file)
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
  
  config NF_NAT_SNMP_BASIC
         tristate "Basic SNMP-ALG support"
-       depends on NF_NAT
+       depends on NF_CONNTRACK_SNMP && NF_NAT
         depends on NETFILTER_ADVANCED
+       default NF_NAT && NF_CONNTRACK_SNMP
         ---help---
  
           This module implements an Application Layer Gateway (ALG) for
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c

index e855fff..e95054c 100644 (file)
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
         newinfo->initial_entries = 0;
         loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(NFPROTO_ARP, info->number);
         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                 if (ret != 0)
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
         duprintf("translate_compat_table: size %u\n", info->size);
         j = 0;
         xt_compat_lock(NFPROTO_ARP);
+       xt_compat_init_offsets(NFPROTO_ARP, number);
         /* Walk through entries, checking offsets. */
         xt_entry_foreach(iter0, entry0, total_size) {
                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c

index 652efea..ef7d7b9 100644 (file)
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
         newinfo->initial_entries = 0;
         loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(AF_INET, info->number);
         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                 if (ret != 0)
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
         duprintf("translate_compat_table: size %u\n", info->size);
         j = 0;
         xt_compat_lock(AF_INET);
+       xt_compat_init_offsets(AF_INET, number);
         /* Walk through entries, checking offsets. */
         xt_entry_foreach(iter0, entry0, total_size) {
                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c

index 1e26a48..403ca57 100644 (file)
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
          * that the ->target() function isn't called after ->destroy() */
  
         ct = nf_ct_get(skb, &ctinfo);
-       if (ct == NULL) {
-               pr_info("no conntrack!\n");
-                       /* FIXME: need to drop invalid ones, since replies
-                        * to outgoing connections of other nodes will be
-                        * marked as INVALID */
+       if (ct == NULL)
                 return NF_DROP;
-       }
  
         /* special case: ICMP error handling. conntrack distinguishes between
          * error messages (RELATED) and information requests (see below) */
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c

index 72ffc8f..d76d6c9 100644 (file)
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
         }
  #endif
  
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                 dump_mac_header(m, loginfo, skb);
  
         dump_packet(m, loginfo, skb, 0);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c

index 294a2a3..aef5d1f 100644 (file)
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
         ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
                            dev_net(out)->ipv4.iptable_mangle);
         /* Reroute for ANY change. */
-       if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+       if (ret != NF_DROP && ret != NF_STOLEN) {
                 iph = ip_hdr(skb);
  
                 if (iph->saddr != saddr ||
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c

index 63f60fc..5585980 100644 (file)
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -20,6 +20,7 @@
  #include <net/netfilter/nf_conntrack_l4proto.h>
  #include <net/netfilter/nf_conntrack_expect.h>
  #include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
  
  struct ct_iter_state {
         struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
         for (st->bucket = 0;
              st->bucket < net->ct.htable_size;
              st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                 if (!is_a_nulls(n))
                         return n;
         }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
         struct net *net = seq_file_net(seq);
         struct ct_iter_state *st = seq->private;
  
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
         while (is_a_nulls(head)) {
                 if (likely(get_nulls_value(head) == st->bucket)) {
                         if (++st->bucket >= net->ct.htable_size)
                                 return NULL;
                 }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
         }
         return head;
  }
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
         struct hlist_node *n;
  
         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                 if (n)
                         return n;
         }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
         struct net *net = seq_file_net(seq);
         struct ct_expect_iter_state *st = seq->private;
  
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
         while (head == NULL) {
                 if (++st->bucket >= nf_ct_expect_hsize)
                         return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
         }
         return head;
  }
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c

index 0f23b3f..703f366 100644 (file)
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
  
         /* Try to get same port: if not, try to change it. */
         for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-               int ret;
+               int res;
  
                 exp->tuple.dst.u.tcp.port = htons(port);
-               ret = nf_ct_expect_related(exp);
-               if (ret == 0)
+               res = nf_ct_expect_related(exp);
+               if (res == 0)
                         break;
-               else if (ret != -EBUSY) {
+               else if (res != -EBUSY) {
                         port = 0;
                         break;
                 }
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c

index c04787c..21bcf47 100644 (file)
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
            manips not an issue.  */
         if (maniptype == IP_NAT_MANIP_SRC &&
             !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-               if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
+               /* try the original tuple first */
+               if (in_range(orig_tuple, range)) {
+                       if (!nf_nat_used_tuple(orig_tuple, ct)) {
+                               *tuple = *orig_tuple;
+                               return;
+                       }
+               } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
+                          range)) {
                         pr_debug("get_unique_tuple: Found current src map\n");
                         if (!nf_nat_used_tuple(tuple, ct))
                                 return;
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct,
         struct net *net = nf_ct_net(ct);
         struct nf_conntrack_tuple curr_tuple, new_tuple;
         struct nf_conn_nat *nat;
-       int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
  
         /* nat helper or nfctnetlink also setup binding */
         nat = nfct_nat(ct);
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct,
                         ct->status |= IPS_DST_NAT;
         }
  
-       /* Place in source hash if this is the first time. */
-       if (have_to_hash) {
+       if (maniptype == IP_NAT_MANIP_SRC) {
                 unsigned int srchash;
  
                 srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct,
  
         /* It's done. */
         if (maniptype == IP_NAT_MANIP_DST)
-               set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_DST_NAT_DONE;
         else
-               set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_SRC_NAT_DONE;
  
         return NF_ACCEPT;
  }
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
         int ret = 0;
  
         spin_lock_bh(&nf_nat_lock);
-       if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+       if (rcu_dereference_protected(
+                       nf_nat_protos[proto->protonum],
+                       lockdep_is_held(&nf_nat_lock)
+                       ) != &nf_nat_unknown_protocol) {
                 ret = -EBUSY;
                 goto out;
         }
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
         if (nat == NULL || nat->ct == NULL)
                 return;
  
-       NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
+       NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
  
         spin_lock_bh(&nf_nat_lock);
         hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old)
         struct nf_conn_nat *old_nat = old;
         struct nf_conn *ct = old_nat->ct;
  
-       if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
+       if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
                 return;
  
         spin_lock_bh(&nf_nat_lock);
-       new_nat->ct = ct;
         hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
         spin_unlock_bh(&nf_nat_lock);
  }
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net)
  {
         /* Leave them the same for the moment. */
         net->ipv4.nat_htable_size = net->ct.htable_size;
-       net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
-                                                      &net->ipv4.nat_vmalloced, 0);
+       net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
         if (!net->ipv4.nat_bysource)
                 return -ENOMEM;
         return 0;
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
  {
         nf_ct_iterate_cleanup(net, &clean_nat, NULL);
         synchronize_rcu();
-       nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-                            net->ipv4.nat_htable_size);
+       nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
  }
  
  static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c

index ee5f419..8812a02 100644 (file)
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -54,6 +54,7 @@
  #include <net/netfilter/nf_conntrack_expect.h>
  #include <net/netfilter/nf_conntrack_helper.h>
  #include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
  {
         int ret = 0;
  
-       ret = nf_conntrack_helper_register(&snmp_helper);
-       if (ret < 0)
-               return ret;
+       BUG_ON(nf_nat_snmp_hook != NULL);
+       rcu_assign_pointer(nf_nat_snmp_hook, help);
+
         ret = nf_conntrack_helper_register(&snmp_trap_helper);
         if (ret < 0) {
                 nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
  
  static void __exit nf_nat_snmp_basic_fini(void)
  {
-       nf_conntrack_helper_unregister(&snmp_helper);
+       rcu_assign_pointer(nf_nat_snmp_hook, NULL);
         nf_conntrack_helper_unregister(&snmp_trap_helper);
  }
  
diff --git a/net/ipv4/route.c b/net/ipv4/route.c

index 351dc4e..3e5b7cc 100644 (file)
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = {
         .release = seq_release,
  };
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  static int rt_acct_proc_show(struct seq_file *m, void *v)
  {
         struct ip_rt_acct *dst, *src;
@@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
         if (!pde)
                 goto err2;
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
         if (!pde)
                 goto err3;
  #endif
         return 0;
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  err3:
         remove_proc_entry("rt_cache", net->proc_net_stat);
  #endif
@@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
  {
         remove_proc_entry("rt_cache", net->proc_net_stat);
         remove_proc_entry("rt_cache", net->proc_net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         remove_proc_entry("rt_acct", net->proc_net);
  #endif
  }
@@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
         memcpy(addr, &src, 4);
  }
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  static void set_class_tag(struct rtable *rt, u32 tag)
  {
         if (!(rt->dst.tclassid & 0xFFFF))
@@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
                     FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
                         rt->rt_gateway = FIB_RES_GW(*res);
                 dst_import_metrics(dst, fi->fib_metrics);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                 dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
  #endif
         }
@@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
         if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
                 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  #ifdef CONFIG_IP_MULTIPLE_TABLES
         set_class_tag(rt, fib_rules_tclass(res));
  #endif
@@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
         rth->fl.mark    = skb->mark;
         rth->fl.fl4_src = saddr;
         rth->rt_src     = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         rth->dst.tclassid = itag;
  #endif
         rth->rt_iif     =
@@ -2208,7 +2208,7 @@ local_input:
         rth->fl.mark    = skb->mark;
         rth->fl.fl4_src = saddr;
         rth->rt_src     = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         rth->dst.tclassid = itag;
  #endif
         rth->rt_iif     =
@@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net,
         }
         if (rt->dst.dev)
                 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         if (rt->dst.tclassid)
                 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
  #endif
@@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
  };
  
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
  struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
-#endif /* CONFIG_NET_CLS_ROUTE */
+#endif /* CONFIG_IP_ROUTE_CLASSID */
  
  static __initdata unsigned long rhash_entries;
  static int __init set_rhash_entries(char *str)
@@ -3267,7 +3267,7 @@ int __init ip_rt_init(void)
  {
         int rc = 0;
  
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
         if (!ip_rt_acct)
                 panic("IP: failed to allocate ip_rt_acct\n");
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c

index 7d227c6..47b7b8d 100644 (file)
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
         newinfo->initial_entries = 0;
         loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(AF_INET6, info->number);
         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                 if (ret != 0)
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
         duprintf("translate_compat_table: size %u\n", info->size);
         j = 0;
         xt_compat_lock(AF_INET6);
+       xt_compat_init_offsets(AF_INET6, number);
         /* Walk through entries, checking offsets. */
         xt_entry_foreach(iter0, entry0, total_size) {
                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c

index 09c8889..05027b7 100644 (file)
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
                in ? in->name : "",
                out ? out->name : "");
  
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                 dump_mac_header(m, loginfo, skb);
  
         dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c

index 79d43aa..0857272 100644 (file)
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -45,6 +45,7 @@
  #include <linux/netfilter_ipv6.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
  
  
  struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags;
  static struct netns_frags nf_init_frags;
  
  #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
         {
                 .procname       = "nf_conntrack_frag6_timeout",
                 .data           = &nf_init_frags.timeout,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c

index 86c3952..2bc6cd7 100644 (file)
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -123,18 +123,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
  }
  
  #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
  
-int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
-                                          struct sk_buff *skb))
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
  {
         rcu_assign_pointer(mh_filter, filter);
         return 0;
  }
  EXPORT_SYMBOL(rawv6_mh_filter_register);
  
-int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
-                                            struct sk_buff *skb))
+int rawv6_mh_filter_unregister(mh_filter_t filter)
  {
         rcu_assign_pointer(mh_filter, NULL);
         synchronize_rcu();
@@ -192,10 +192,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
                          * policy is placed in rawv6_rcv() because it is
                          * required for each socket.
                          */
-                       int (*filter)(struct sock *sock, struct sk_buff *skb);
+                       mh_filter_t *filter;
  
                         filter = rcu_dereference(mh_filter);
-                       filtered = filter ? filter(sk, skb) : 0;
+                       filtered = filter ? (*filter)(sk, skb) : 0;
                         break;
                 }
  #endif
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c

index 8ce38f1..b1599a3 100644 (file)
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
  
         p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
         do {
-               n = p->next;
+               n = rcu_dereference_protected(p->next, 1);
                 kfree(p);
                 p = n;
         } while (p);
@@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
  static int
  ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
  {
-       struct ip_tunnel_prl_entry *x, **p;
+       struct ip_tunnel_prl_entry *x;
+       struct ip_tunnel_prl_entry __rcu **p;
         int err = 0;
  
         ASSERT_RTNL();
  
         if (a && a->addr != htonl(INADDR_ANY)) {
-               for (p = &t->prl; *p; p = &(*p)->next) {
-                       if ((*p)->addr == a->addr) {
-                               x = *p;
+               for (p = &t->prl;
+                    (x = rtnl_dereference(*p)) != NULL;
+                    p = &x->next) {
+                       if (x->addr == a->addr) {
                                 *p = x->next;
                                 call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
                                 t->prl_count--;
@@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
                 }
                 err = -ENXIO;
         } else {
-               if (t->prl) {
+               x = rtnl_dereference(t->prl);
+               if (x) {
                         t->prl_count = 0;
-                       x = t->prl;
                         call_rcu(&x->rcu_head, prl_list_destroy_rcu);
                         t->prl = NULL;
                 }
@@ -1179,7 +1181,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
         if (!dev->tstats)
                 return -ENOMEM;
         dev_hold(dev);
-       sitn->tunnels_wc[0]     = tunnel;
+       rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
         return 0;
  }
  
@@ -1196,11 +1198,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
         for (prio = 1; prio < 4; prio++) {
                 int h;
                 for (h = 0; h < HASH_SIZE; h++) {
-                       struct ip_tunnel *t = sitn->tunnels[prio][h];
+                       struct ip_tunnel *t;
  
+                       t = rtnl_dereference(sitn->tunnels[prio][h]);
                         while (t != NULL) {
                                 unregister_netdevice_queue(t->dev, head);
-                               t = t->next;
+                               t = rtnl_dereference(t->next);
                         }
                 }
         }
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig

index 1534f2b..faf7412 100644 (file)
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
  
           If unsure, say `N'.
  
+config NF_CONNTRACK_TIMESTAMP
+       bool  'Connection tracking timestamping'
+       depends on NETFILTER_ADVANCED
+       help
+         This option enables support for connection tracking timestamping.
+         This allows you to store the flow start-time and to obtain
+         the flow-stop time (once it has been destroyed) via Connection
+         tracking events.
+
+         If unsure, say `N'.
+
  config NF_CT_PROTO_DCCP
         tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
         depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
  
           To compile it as a module, choose M here.  If unsure, say N.
  
+config NF_CONNTRACK_BROADCAST
+       tristate
+
  config NF_CONNTRACK_NETBIOS_NS
         tristate "NetBIOS name service protocol support"
         depends on NETFILTER_ADVANCED
+       select NF_CONNTRACK_BROADCAST
         help
           NetBIOS name service requests are sent as broadcast messages from an
           unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
  
           To compile it as a module, choose M here.  If unsure, say N.
  
+config NF_CONNTRACK_SNMP
+       tristate "SNMP service protocol support"
+       depends on NETFILTER_ADVANCED
+       select NF_CONNTRACK_BROADCAST
+       help
+         SNMP service requests are sent as broadcast messages from an
+         unprivileged port and responded to with unicast messages to the
+         same port. This make them hard to firewall properly because connection
+         tracking doesn't deal with broadcasts. This helper tracks locally
+         originating SNMP service requests and the corresponding
+         responses. It relies on correct IP address configuration, specifically
+         netmask and broadcast address.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
  config NF_CONNTRACK_PPTP
         tristate "PPtP protocol support"
         depends on NETFILTER_ADVANCED
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK
  
  comment "Xtables targets"
  
+config NETFILTER_XT_TARGET_AUDIT
+       tristate "AUDIT target support"
+       depends on AUDIT
+       depends on NETFILTER_ADVANCED
+       ---help---
+         This option adds a 'AUDIT' target, which can be used to create
+         audit records for packets dropped/accepted.
+
+         To compileit as a module, choose M here. If unsure, say N.
+
  config NETFILTER_XT_TARGET_CHECKSUM
         tristate "CHECKSUM target support"
         depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG
  config NETFILTER_XT_TARGET_NFQUEUE
         tristate '"NFQUEUE" target Support'
         depends on NETFILTER_ADVANCED
+       select NETFILTER_NETLINK_QUEUE
         help
           This target replaced the old obsolete QUEUE target.
  
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST
  config NETFILTER_XT_MATCH_REALM
         tristate  '"realm" match support'
         depends on NETFILTER_ADVANCED
-       select NET_CLS_ROUTE
+       select IP_ROUTE_CLASSID
         help
           This option adds a `realm' match, which allows you to use the realm
           key from the routing subsystem inside iptables.
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile

index 441050f..9ae6878 100644 (file)
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -1,6 +1,7 @@
  netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
  
  nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
  nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
  
  obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
  obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
  obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
  obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
  obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
  obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
  obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
  obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
  obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
  
  # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c

index 32fcbe2..1e00bf7 100644 (file)
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -175,13 +175,21 @@ next_hook:
                 ret = 1;
         } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
                 kfree_skb(skb);
-               ret = -(verdict >> NF_VERDICT_BITS);
+               ret = NF_DROP_GETERR(verdict);
                 if (ret == 0)
                         ret = -EPERM;
         } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-                             verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+                              verdict >> NF_VERDICT_QBITS);
+               if (ret < 0) {
+                       if (ret == -ECANCELED)
+                               goto next_hook;
+                       if (ret == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
+               ret = 0;
         }
         rcu_read_unlock();
         return ret;
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable);
  /* This does not belong here, but locally generated errors need it if connection
     tracking in use: without this, connection may not be in hash table, and hence
     manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
  EXPORT_SYMBOL(ip_ct_attach);
  
  void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
  }
  EXPORT_SYMBOL(nf_ct_attach);
  
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
  EXPORT_SYMBOL(nf_ct_destroy);
  
  void nf_conntrack_destroy(struct nf_conntrack *nfct)
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c

index a475ede..5c48ffb 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
  EXPORT_SYMBOL(unregister_ip_vs_app);
  EXPORT_SYMBOL(register_ip_vs_app_inc);
  
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
  /*
   *     Get an ip_vs_app object
   */
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
   *     Allocate/initialize app incarnation and register it in proto apps.
   */
  static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+                 __u16 port)
  {
         struct ip_vs_protocol *pp;
         struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
                 }
         }
  
-       ret = pp->register_app(inc);
+       ret = pp->register_app(net, inc);
         if (ret)
                 goto out;
  
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
   *     Release app incarnation
   */
  static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
  {
         struct ip_vs_protocol *pp;
  
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
                 return;
  
         if (pp->unregister_app)
-               pp->unregister_app(inc);
+               pp->unregister_app(net, inc);
  
         IP_VS_DBG(9, "%s App %s:%u unregistered\n",
                   pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
   *     Register an application incarnation in protocol applications
   */
  int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+                      __u16 port)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         int result;
  
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
  
-       result = ip_vs_app_inc_new(app, proto, port);
+       result = ip_vs_app_inc_new(net, app, proto, port);
  
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
  
         return result;
  }
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
  /*
   *     ip_vs_app registration routine
   */
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         /* increase the module use count */
         ip_vs_use_count_inc();
  
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
  
-       list_add(&app->a_list, &ip_vs_app_list);
+       list_add(&app->a_list, &ipvs->app_list);
  
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
  
         return 0;
  }
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
   *     ip_vs_app unregistration routine
   *     We are sure there are no app incarnations attached to services
   */
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         struct ip_vs_app *inc, *nxt;
  
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
  
         list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-               ip_vs_app_inc_release(inc);
+               ip_vs_app_inc_release(net, inc);
         }
  
         list_del(&app->a_list);
  
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
  
         /* decrease the module use count */
         ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
  /*
   *     Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
   */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+                  struct ip_vs_protocol *pp)
  {
         return pp->app_conn_bind(cp);
  }
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
   *     /proc/net/ip_vs_app entry function
   */
  
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
  {
         struct ip_vs_app *app, *inc;
  
-       list_for_each_entry(app, &ip_vs_app_list, a_list) {
+       list_for_each_entry(app, &ipvs->app_list, a_list) {
                 list_for_each_entry(inc, &app->incs_list, a_list) {
                         if (pos-- == 0)
                                 return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
  
  static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
  {
-       mutex_lock(&__ip_vs_app_mutex);
+       struct net *net = seq_file_net(seq);
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
-       return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+       mutex_lock(&ipvs->app_mutex);
+
+       return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
  }
  
  static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         struct ip_vs_app *inc, *app;
         struct list_head *e;
+       struct net *net = seq_file_net(seq);
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
         ++*pos;
         if (v == SEQ_START_TOKEN)
-               return ip_vs_app_idx(0);
+               return ip_vs_app_idx(ipvs, 0);
  
         inc = v;
         app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
                 return list_entry(e, struct ip_vs_app, a_list);
  
         /* go on to next application */
-       for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+       for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
                 app = list_entry(e, struct ip_vs_app, a_list);
                 list_for_each_entry(inc, &app->incs_list, a_list) {
                         return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  
  static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
  {
-       mutex_unlock(&__ip_vs_app_mutex);
+       struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+       mutex_unlock(&ipvs->app_mutex);
  }
  
  static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
  
  static int ip_vs_app_open(struct inode *inode, struct file *file)
  {
-       return seq_open(file, &ip_vs_app_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+                           sizeof(struct seq_net_private));
  }
  
  static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
  };
  #endif
  
-int __init ip_vs_app_init(void)
+static int __net_init __ip_vs_app_init(struct net *net)
  {
-       /* we will replace it with proc_net_ipvs_create() soon */
-       proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->app_list);
+       __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
+       proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
         return 0;
  }
  
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+       proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+       .init = __ip_vs_app_init,
+       .exit = __ip_vs_app_cleanup,
+};
+
+int __init ip_vs_app_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_app_ops);
+       return rv;
+}
+
  
  void ip_vs_app_cleanup(void)
  {
-       proc_net_remove(&init_net, "ip_vs_app");
+       unregister_pernet_subsys(&ip_vs_app_ops);
  }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c

index e9adecd..83233fe 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -48,35 +48,32 @@
  /*
   * Connection hash size. Default is what was selected at compile time.
  */
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
  module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
  MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
  
  /* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
  
  /*
   *  Connection hash table: for input and output packets lookups of IPVS
   */
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
  
  /*  SLAB cache for IPVS connections */
  static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
  
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
  /*  counter for no client port connections */
  static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
  
  /* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
  
  /*
   *  Fine locking granularity for big connection hash table
   */
-#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_BITS  5
  #define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
  #define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
  
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
  /*
   *     Returns hash value for IPVS connection entry
   */
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
                                        const union nf_inet_addr *addr,
                                        __be16 port)
  {
  #ifdef CONFIG_IP_VS_IPV6
         if (af == AF_INET6)
-               return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
-                                   (__force u32)port, proto, ip_vs_conn_rnd)
-                       & ip_vs_conn_tab_mask;
+               return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+                                   (__force u32)port, proto, ip_vs_conn_rnd) ^
+                       ((size_t)net>>8)) & ip_vs_conn_tab_mask;
  #endif
-       return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
-                           ip_vs_conn_rnd)
-               & ip_vs_conn_tab_mask;
+       return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+                           ip_vs_conn_rnd) ^
+               ((size_t)net>>8)) & ip_vs_conn_tab_mask;
  }
  
  static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
                 port = p->vport;
         }
  
-       return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+       return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
  }
  
  static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
  {
         struct ip_vs_conn_param p;
  
-       ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
-                             NULL, 0, &p);
+       ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+                             &cp->caddr, cp->cport, NULL, 0, &p);
  
-       if (cp->dest && cp->dest->svc->pe) {
-               p.pe = cp->dest->svc->pe;
+       if (cp->pe) {
+               p.pe = cp->pe;
                 p.pe_data = cp->pe_data;
                 p.pe_data_len = cp->pe_data_len;
         }
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
  }
  
  /*
- *     Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *     Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
   *     returns bool success.
   */
  static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
  
         list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                 if (cp->af == p->af &&
+                   p->cport == cp->cport && p->vport == cp->vport &&
                     ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
                     ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
-                   p->cport == cp->cport && p->vport == cp->vport &&
                     ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-                   p->protocol == cp->protocol) {
+                   p->protocol == cp->protocol &&
+                   ip_vs_conn_net_eq(cp, p->net)) {
                         /* HIT */
                         atomic_inc(&cp->refcnt);
                         ct_read_unlock(hash);
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
                             struct ip_vs_conn_param *p)
  {
         __be16 _ports[2], *pptr;
+       struct net *net = skb_net(skb);
  
         pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
         if (pptr == NULL)
                 return 1;
  
         if (likely(!inverse))
-               ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
-                                     &iph->daddr, pptr[1], p);
+               ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+                                     pptr[0], &iph->daddr, pptr[1], p);
         else
-               ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
-                                     &iph->saddr, pptr[0], p);
+               ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+                                     pptr[1], &iph->saddr, pptr[0], p);
         return 0;
  }
  
  struct ip_vs_conn *
  ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-                       struct ip_vs_protocol *pp,
                         const struct ip_vs_iphdr *iph,
                         unsigned int proto_off, int inverse)
  {
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
         ct_read_lock(hash);
  
         list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+               if (!ip_vs_conn_net_eq(cp, p->net))
+                       continue;
                 if (p->pe_data && p->pe->ct_match) {
-                       if (p->pe->ct_match(p, cp))
+                       if (p->pe == cp->pe && p->pe->ct_match(p, cp))
                                 goto out;
                         continue;
                 }
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
  
         list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                 if (cp->af == p->af &&
+                   p->vport == cp->cport && p->cport == cp->dport &&
                     ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
                     ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
-                   p->vport == cp->cport && p->cport == cp->dport &&
-                   p->protocol == cp->protocol) {
+                   p->protocol == cp->protocol &&
+                   ip_vs_conn_net_eq(cp, p->net)) {
                         /* HIT */
                         atomic_inc(&cp->refcnt);
                         ret = cp;
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
  
  struct ip_vs_conn *
  ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-                        struct ip_vs_protocol *pp,
                          const struct ip_vs_iphdr *iph,
                          unsigned int proto_off, int inverse)
  {
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
         struct ip_vs_dest *dest;
  
         if ((cp) && (!cp->dest)) {
-               dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
-                                      &cp->vaddr, cp->vport,
-                                      cp->protocol);
+               dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+                                      cp->dport, &cp->vaddr, cp->vport,
+                                      cp->protocol, cp->fwmark);
                 ip_vs_bind_dest(cp, dest);
                 return dest;
         } else
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
  int ip_vs_check_template(struct ip_vs_conn *ct)
  {
         struct ip_vs_dest *dest = ct->dest;
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
  
         /*
          * Checking the dest server status.
          */
         if ((dest == NULL) ||
             !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-           (sysctl_ip_vs_expire_quiescent_template &&
+           (ipvs->sysctl_expire_quiescent_template &&
              (atomic_read(&dest->weight) == 0))) {
                 IP_VS_DBG_BUF(9, "check_template: dest not available for "
                               "protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
  static void ip_vs_conn_expire(unsigned long data)
  {
         struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
  
         cp->timeout = 60*HZ;
  
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data)
                 if (cp->flags & IP_VS_CONN_F_NFCT)
                         ip_vs_conn_drop_conntrack(cp);
  
+               ip_vs_pe_put(cp->pe);
                 kfree(cp->pe_data);
                 if (unlikely(cp->app != NULL))
                         ip_vs_unbind_app(cp);
                 ip_vs_unbind_dest(cp);
                 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
                         atomic_dec(&ip_vs_conn_no_cport_cnt);
-               atomic_dec(&ip_vs_conn_count);
+               atomic_dec(&ipvs->conn_count);
  
                 kmem_cache_free(ip_vs_conn_cachep, cp);
                 return;
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
  struct ip_vs_conn *
  ip_vs_conn_new(const struct ip_vs_conn_param *p,
                const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-              struct ip_vs_dest *dest)
+              struct ip_vs_dest *dest, __u32 fwmark)
  {
         struct ip_vs_conn *cp;
-       struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+       struct netns_ipvs *ipvs = net_ipvs(p->net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+                                                          p->protocol);
  
         cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
         if (cp == NULL) {
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
  
         INIT_LIST_HEAD(&cp->c_list);
         setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+       ip_vs_conn_net_set(cp, p->net);
         cp->af             = p->af;
         cp->protocol       = p->protocol;
         ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
                         &cp->daddr, daddr);
         cp->dport          = dport;
         cp->flags          = flags;
-       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+       cp->fwmark         = fwmark;
+       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+               ip_vs_pe_get(p->pe);
+               cp->pe = p->pe;
                 cp->pe_data = p->pe_data;
                 cp->pe_data_len = p->pe_data_len;
         }
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
         atomic_set(&cp->n_control, 0);
         atomic_set(&cp->in_pkts, 0);
  
-       atomic_inc(&ip_vs_conn_count);
+       atomic_inc(&ipvs->conn_count);
         if (flags & IP_VS_CONN_F_NO_CPORT)
                 atomic_inc(&ip_vs_conn_no_cport_cnt);
  
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
  #endif
                 ip_vs_bind_xmit(cp);
  
-       if (unlikely(pp && atomic_read(&pp->appcnt)))
-               ip_vs_bind_app(cp, pp);
+       if (unlikely(pd && atomic_read(&pd->appcnt)))
+               ip_vs_bind_app(cp, pd->pp);
  
         /*
          * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
          * IP_VS_CONN_F_ONE_PACKET too.
          */
  
-       if (ip_vs_conntrack_enabled())
+       if (ip_vs_conntrack_enabled(ipvs))
                 cp->flags |= IP_VS_CONN_F_NFCT;
  
         /* Hash it in the ip_vs_conn_tab finally */
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
   *     /proc/net/ip_vs_conn entries
   */
  #ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+       struct seq_net_private p;
+       struct list_head *l;
+};
  
  static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
  {
         int idx;
         struct ip_vs_conn *cp;
+       struct ip_vs_iter_state *iter = seq->private;
  
         for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
                 ct_read_lock_bh(idx);
                 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
                         if (pos-- == 0) {
-                               seq->private = &ip_vs_conn_tab[idx];
+                               iter->l = &ip_vs_conn_tab[idx];
                         return cp;
                         }
                 }
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
  
  static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
  {
-       seq->private = NULL;
+       struct ip_vs_iter_state *iter = seq->private;
+
+       iter->l = NULL;
         return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
  }
  
  static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
  {
         struct ip_vs_conn *cp = v;
-       struct list_head *e, *l = seq->private;
+       struct ip_vs_iter_state *iter = seq->private;
+       struct list_head *e, *l = iter->l;
         int idx;
  
         ++*pos;
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
         while (++idx < ip_vs_conn_tab_size) {
                 ct_read_lock_bh(idx);
                 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-                       seq->private = &ip_vs_conn_tab[idx];
+                       iter->l = &ip_vs_conn_tab[idx];
                         return cp;
                 }
                 ct_read_unlock_bh(idx);
         }
-       seq->private = NULL;
+       iter->l = NULL;
         return NULL;
  }
  
  static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
  {
-       struct list_head *l = seq->private;
+       struct ip_vs_iter_state *iter = seq->private;
+       struct list_head *l = iter->l;
  
         if (l)
                 ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
     "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
         else {
                 const struct ip_vs_conn *cp = v;
+               struct net *net = seq_file_net(seq);
                 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
                 size_t len = 0;
  
-               if (cp->dest && cp->pe_data &&
-                   cp->dest->svc->pe->show_pe_data) {
+               if (!ip_vs_conn_net_eq(cp, net))
+                       return 0;
+               if (cp->pe_data) {
                         pe_data[0] = ' ';
-                       len = strlen(cp->dest->svc->pe->name);
-                       memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+                       len = strlen(cp->pe->name);
+                       memcpy(pe_data + 1, cp->pe->name, len);
                         pe_data[len + 1] = ' ';
                         len += 2;
-                       len += cp->dest->svc->pe->show_pe_data(cp,
-                                                              pe_data + len);
+                       len += cp->pe->show_pe_data(cp, pe_data + len);
                 }
                 pe_data[len] = '\0';
  
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
  
  static int ip_vs_conn_open(struct inode *inode, struct file *file)
  {
-       return seq_open(file, &ip_vs_conn_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+                           sizeof(struct ip_vs_iter_state));
  }
  
  static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
     "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
         else {
                 const struct ip_vs_conn *cp = v;
+               struct net *net = seq_file_net(seq);
+
+               if (!ip_vs_conn_net_eq(cp, net))
+                       return 0;
  
  #ifdef CONFIG_IP_VS_IPV6
                 if (cp->af == AF_INET6)
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
  
  static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
  {
-       return seq_open(file, &ip_vs_conn_sync_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+                           sizeof(struct ip_vs_iter_state));
  }
  
  static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
  }
  
  /* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
  {
         int idx;
         struct ip_vs_conn *cp;
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void)
                         if (cp->flags & IP_VS_CONN_F_TEMPLATE)
                                 /* connection template */
                                 continue;
-
+                       if (!ip_vs_conn_net_eq(cp, net))
+                               continue;
                         if (cp->protocol == IPPROTO_TCP) {
                                 switch(cp->state) {
                                 case IP_VS_TCP_S_SYN_RECV:
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void)
  /*
   *      Flush all the connection entries in the ip_vs_conn_tab
   */
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
  {
         int idx;
         struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
-  flush_again:
+flush_again:
         for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
                 /*
                  *  Lock is actually needed in this loop.
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void)
                 ct_write_lock_bh(idx);
  
                 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+                       if (!ip_vs_conn_net_eq(cp, net))
+                               continue;
                         IP_VS_DBG(4, "del connection\n");
                         ip_vs_conn_expire_now(cp);
                         if (cp->control) {
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void)
  
         /* the counter may be not NULL, because maybe some conn entries
            are run by slow timer handler or unhashed but still referred */
-       if (atomic_read(&ip_vs_conn_count) != 0) {
+       if (atomic_read(&ipvs->conn_count) != 0) {
                 schedule();
                 goto flush_again;
         }
  }
+/*
+ * per netns init and exit
+ */
+int __net_init __ip_vs_conn_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       atomic_set(&ipvs->conn_count, 0);
+
+       proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+       proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+       return 0;
+}
  
+static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+{
+       /* flush all the connection entries first */
+       ip_vs_conn_flush(net);
+       proc_net_remove(net, "ip_vs_conn");
+       proc_net_remove(net, "ip_vs_conn_sync");
+}
+static struct pernet_operations ipvs_conn_ops = {
+       .init = __ip_vs_conn_init,
+       .exit = __ip_vs_conn_cleanup,
+};
  
  int __init ip_vs_conn_init(void)
  {
         int idx;
+       int retc;
  
         /* Compute size and mask */
         ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void)
                 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
         }
  
-       proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-       proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+       retc = register_pernet_subsys(&ipvs_conn_ops);
  
         /* calculate the random value for connection hash */
         get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
  
-       return 0;
+       return retc;
  }
  
-
  void ip_vs_conn_cleanup(void)
  {
-       /* flush all the connection entries first */
-       ip_vs_conn_flush();
-
+       unregister_pernet_subsys(&ipvs_conn_ops);
         /* Release the empty cache */
         kmem_cache_destroy(ip_vs_conn_cachep);
-       proc_net_remove(&init_net, "ip_vs_conn");
-       proc_net_remove(&init_net, "ip_vs_conn_sync");
         vfree(ip_vs_conn_tab);
  }
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c

index b4e51e9..f36a84f 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -41,6 +41,7 @@
  #include <net/icmp.h>                   /* for icmp_send */
  #include <net/route.h>
  #include <net/ip6_checksum.h>
+#include <net/netns/generic.h>         /* net_generic() */
  
  #include <linux/netfilter.h>
  #include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
  EXPORT_SYMBOL(ip_vs_get_debug_level);
  #endif
  
+int ip_vs_net_id __read_mostly;
+#ifdef IP_VS_GENERIC_NETNS
+EXPORT_SYMBOL(ip_vs_net_id);
+#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
  
  /* ID used in ICMP lookups */
  #define icmp_id(icmph)          (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
  ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
  {
         struct ip_vs_dest *dest = cp->dest;
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
         if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               spin_lock(&dest->stats.lock);
-               dest->stats.ustats.inpkts++;
-               dest->stats.ustats.inbytes += skb->len;
-               spin_unlock(&dest->stats.lock);
-
-               spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.ustats.inpkts++;
-               dest->svc->stats.ustats.inbytes += skb->len;
-               spin_unlock(&dest->svc->stats.lock);
-
-               spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.ustats.inpkts++;
-               ip_vs_stats.ustats.inbytes += skb->len;
-               spin_unlock(&ip_vs_stats.lock);
+               struct ip_vs_cpu_stats *s;
+
+               s = this_cpu_ptr(dest->stats.cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(ipvs->cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
         }
  }
  
@@ -131,21 +145,28 @@ static inline void
  ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
  {
         struct ip_vs_dest *dest = cp->dest;
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
         if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               spin_lock(&dest->stats.lock);
-               dest->stats.ustats.outpkts++;
-               dest->stats.ustats.outbytes += skb->len;
-               spin_unlock(&dest->stats.lock);
-
-               spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.ustats.outpkts++;
-               dest->svc->stats.ustats.outbytes += skb->len;
-               spin_unlock(&dest->svc->stats.lock);
-
-               spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.ustats.outpkts++;
-               ip_vs_stats.ustats.outbytes += skb->len;
-               spin_unlock(&ip_vs_stats.lock);
+               struct ip_vs_cpu_stats *s;
+
+               s = this_cpu_ptr(dest->stats.cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(ipvs->cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
         }
  }
  
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
  static inline void
  ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
  {
-       spin_lock(&cp->dest->stats.lock);
-       cp->dest->stats.ustats.conns++;
-       spin_unlock(&cp->dest->stats.lock);
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
+       struct ip_vs_cpu_stats *s;
+
+       s = this_cpu_ptr(cp->dest->stats.cpustats);
+       s->ustats.conns++;
  
-       spin_lock(&svc->stats.lock);
-       svc->stats.ustats.conns++;
-       spin_unlock(&svc->stats.lock);
+       s = this_cpu_ptr(svc->stats.cpustats);
+       s->ustats.conns++;
  
-       spin_lock(&ip_vs_stats.lock);
-       ip_vs_stats.ustats.conns++;
-       spin_unlock(&ip_vs_stats.lock);
+       s = this_cpu_ptr(ipvs->cpustats);
+       s->ustats.conns++;
  }
  
  
  static inline int
  ip_vs_set_state(struct ip_vs_conn *cp, int direction,
                 const struct sk_buff *skb,
-               struct ip_vs_protocol *pp)
+               struct ip_vs_proto_data *pd)
  {
-       if (unlikely(!pp->state_transition))
+       if (unlikely(!pd->pp->state_transition))
                 return 0;
-       return pp->state_transition(cp, direction, skb, pp);
+       return pd->pp->state_transition(cp, direction, skb, pd);
  }
  
-static inline void
+static inline int
  ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
                               struct sk_buff *skb, int protocol,
                               const union nf_inet_addr *caddr, __be16 cport,
                               const union nf_inet_addr *vaddr, __be16 vport,
                               struct ip_vs_conn_param *p)
  {
-       ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+       ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+                             vport, p);
         p->pe = svc->pe;
         if (p->pe && p->pe->fill_param)
-               p->pe->fill_param(p, skb);
+               return p->pe->fill_param(p, skb);
+
+       return 0;
  }
  
  /*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
  static struct ip_vs_conn *
  ip_vs_sched_persist(struct ip_vs_service *svc,
                     struct sk_buff *skb,
-                   __be16 ports[2])
+                   __be16 src_port, __be16 dst_port, int *ignored)
  {
         struct ip_vs_conn *cp = NULL;
         struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  
         IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
                       "mnet %s\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
                       IP_VS_DBG_ADDR(svc->af, &snet));
  
         /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
                 __be16 vport = 0;
  
-               if (ports[1] == svc->port) {
+               if (dst_port == svc->port) {
                         /* non-FTP template:
                          * <protocol, caddr, 0, vaddr, vport, daddr, dport>
                          * FTP template:
                          * <protocol, caddr, 0, vaddr, 0, daddr, 0>
                          */
                         if (svc->port != FTPPORT)
-                               vport = ports[1];
+                               vport = dst_port;
                 } else {
                         /* Note: persistent fwmark-based services and
                          * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                 vaddr = &fwmark;
                         }
                 }
-               ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
-                                             vaddr, vport, &param);
+               /* return *ignored = -1 so NF_DROP can be used */
+               if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+                                                 vaddr, vport, &param) < 0) {
+                       *ignored = -1;
+                       return NULL;
+               }
         }
  
         /* Check if a template already exists */
         ct = ip_vs_ct_in_get(&param);
         if (!ct || !ip_vs_check_template(ct)) {
-               /* No template found or the dest of the connection
+               /*
+                * No template found or the dest of the connection
                  * template is not available.
+                * return *ignored=0 i.e. ICMP and NF_DROP
                  */
                 dest = svc->scheduler->schedule(svc, skb);
                 if (!dest) {
                         IP_VS_DBG(1, "p-schedule: no dest found.\n");
                         kfree(param.pe_data);
+                       *ignored = 0;
                         return NULL;
                 }
  
-               if (ports[1] == svc->port && svc->port != FTPPORT)
+               if (dst_port == svc->port && svc->port != FTPPORT)
                         dport = dest->port;
  
                 /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                  * and thus param.pe_data will be destroyed
                  * when the template expires */
                 ct = ip_vs_conn_new(&param, &dest->addr, dport,
-                                   IP_VS_CONN_F_TEMPLATE, dest);
+                                   IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
                 if (ct == NULL) {
                         kfree(param.pe_data);
+                       *ignored = -1;
                         return NULL;
                 }
  
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 kfree(param.pe_data);
         }
  
-       dport = ports[1];
+       dport = dst_port;
         if (dport == svc->port && dest->port)
                 dport = dest->port;
  
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
         /*
          *    Create a new connection according to the template
          */
-       ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
-                             &iph.daddr, ports[1], &param);
-       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+       ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+                             src_port, &iph.daddr, dst_port, &param);
+
+       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
         if (cp == NULL) {
                 ip_vs_conn_put(ct);
+               *ignored = -1;
                 return NULL;
         }
  
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
   *  It selects a server according to the virtual service, and
   *  creates a connection entry.
   *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
   */
  struct ip_vs_conn *
  ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-              struct ip_vs_protocol *pp, int *ignored)
+              struct ip_vs_proto_data *pd, int *ignored)
  {
+       struct ip_vs_protocol *pp = pd->pp;
         struct ip_vs_conn *cp = NULL;
         struct ip_vs_iphdr iph;
         struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
         }
  
         /*
-        * Do not schedule replies from local real server. It is risky
-        * for fwmark services but mostly for persistent services.
+        *    Do not schedule replies from local real server.
          */
         if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-           (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
-           (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+           (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
                 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
                               "Not scheduling reply for existing connection");
                 __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
         /*
          *    Persistent service
          */
-       if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
-               *ignored = 0;
-               return ip_vs_sched_persist(svc, skb, pptr);
-       }
+       if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+               return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+       *ignored = 0;
  
         /*
          *    Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                 return NULL;
         }
  
-       *ignored = 0;
-
         dest = svc->scheduler->schedule(svc, skb);
         if (dest == NULL) {
                 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
          */
         {
                 struct ip_vs_conn_param p;
-               ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
-                                     pptr[0], &iph.daddr, pptr[1], &p);
+
+               ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+                                     &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+                                     &p);
                 cp = ip_vs_conn_new(&p, &dest->addr,
                                     dest->port ? dest->port : pptr[1],
-                                   flags, dest);
-               if (!cp)
+                                   flags, dest, skb->mark);
+               if (!cp) {
+                       *ignored = -1;
                         return NULL;
+               }
         }
  
         IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
   *  no destination is available for a new connection.
   */
  int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-               struct ip_vs_protocol *pp)
+               struct ip_vs_proto_data *pd)
  {
+       struct net *net;
+       struct netns_ipvs *ipvs;
         __be16 _ports[2], *pptr;
         struct ip_vs_iphdr iph;
         int unicast;
+
         ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
  
         pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                 ip_vs_service_put(svc);
                 return NF_DROP;
         }
+       net = skb_net(skb);
  
  #ifdef CONFIG_IP_VS_IPV6
         if (svc->af == AF_INET6)
                 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
         else
  #endif
-               unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+               unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
  
         /* if it is fwmark-based service, the cache_bypass sysctl is up
            and the destination is a non-local unicast, then create
            a cache_bypass connection entry */
-       if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+       ipvs = net_ipvs(net);
+       if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
                 int ret, cs;
                 struct ip_vs_conn *cp;
                 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
                 {
                         struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(svc->af, iph.protocol,
+                       ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
                                               &iph.saddr, pptr[0],
                                               &iph.daddr, pptr[1], &p);
                         cp = ip_vs_conn_new(&p, &daddr, 0,
                                             IP_VS_CONN_F_BYPASS | flags,
-                                           NULL);
+                                           NULL, skb->mark);
                         if (!cp)
                                 return NF_DROP;
                 }
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                 ip_vs_in_stats(cp, skb);
  
                 /* set state */
-               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
  
                 /* transmit the first SYN packet */
-               ret = cp->packet_xmit(skb, cp, pp);
+               ret = cp->packet_xmit(skb, cp, pd->pp);
                 /* do not touch skb anymore */
  
                 atomic_inc(&cp->in_pkts);
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
                                 struct ip_vs_protocol *pp,
                                 unsigned int offset, unsigned int ihl)
  {
+       struct netns_ipvs *ipvs;
         unsigned int verdict = NF_DROP;
  
         if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
         if (!skb_make_writable(skb, offset))
                 goto out;
  
+       ipvs = net_ipvs(skb_net(skb));
+
  #ifdef CONFIG_IP_VS_IPV6
         if (af == AF_INET6)
                 ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
  
  #ifdef CONFIG_IP_VS_IPV6
         if (af == AF_INET6) {
-               if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+               if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
                         goto out;
         } else
  #endif
-               if ((sysctl_ip_vs_snat_reroute ||
+               if ((ipvs->sysctl_snat_reroute ||
                      skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
                     ip_route_me_harder(skb, RTN_LOCAL) != 0)
                         goto out;
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
  
         ip_vs_fill_iphdr(AF_INET, cih, &ciph);
         /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
         if (!cp)
                 return NF_ACCEPT;
  
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
  
         ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
         /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
         if (!cp)
                 return NF_ACCEPT;
  
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
   * Used for NAT and local client.
   */
  static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                 struct ip_vs_conn *cp, int ihl)
  {
+       struct ip_vs_protocol *pp = pd->pp;
+       struct netns_ipvs *ipvs;
+
         IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
  
         if (!skb_make_writable(skb, ihl))
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
          * if it came from this machine itself.  So re-compute
          * the routing information.
          */
+       ipvs = net_ipvs(skb_net(skb));
+
  #ifdef CONFIG_IP_VS_IPV6
         if (af == AF_INET6) {
-               if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+               if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
                         goto drop;
         } else
  #endif
-               if ((sysctl_ip_vs_snat_reroute ||
+               if ((ipvs->sysctl_snat_reroute ||
                      skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
                     ip_route_me_harder(skb, RTN_LOCAL) != 0)
                         goto drop;
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
         IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
  
         ip_vs_out_stats(cp, skb);
-       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
         skb->ipvs_property = 1;
         if (!(cp->flags & IP_VS_CONN_F_NFCT))
                 ip_vs_notrack(skb);
@@ -999,9 +1062,12 @@ drop:
  static unsigned int
  ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  {
+       struct net *net = NULL;
         struct ip_vs_iphdr iph;
         struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
         struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs;
  
         EnterFunction(11);
  
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
         if (unlikely(!skb_dst(skb)))
                 return NF_ACCEPT;
  
+       net = skb_net(skb);
         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
  #ifdef CONFIG_IP_VS_IPV6
         if (af == AF_INET6) {
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
                         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
                 }
  
-       pp = ip_vs_proto_get(iph.protocol);
-       if (unlikely(!pp))
+       pd = ip_vs_proto_data_get(net, iph.protocol);
+       if (unlikely(!pd))
                 return NF_ACCEPT;
+       pp = pd->pp;
  
         /* reassemble IP fragments */
  #ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
         /*
          * Check if the packet belongs to an existing entry
          */
-       cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+       cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+       ipvs = net_ipvs(net);
  
         if (likely(cp))
-               return handle_response(af, skb, pp, cp, iph.len);
-       if (sysctl_ip_vs_nat_icmp_send &&
+               return handle_response(af, skb, pd, cp, iph.len);
+       if (ipvs->sysctl_nat_icmp_send &&
             (pp->protocol == IPPROTO_TCP ||
              pp->protocol == IPPROTO_UDP ||
              pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
                                           sizeof(_ports), _ports);
                 if (pptr == NULL)
                         return NF_ACCEPT;       /* Not for me */
-               if (ip_vs_lookup_real_service(af, iph.protocol,
+               if (ip_vs_lookup_real_service(net, af, iph.protocol,
                                               &iph.saddr,
                                               pptr[0])) {
                         /*
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
  static int
  ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
  {
+       struct net *net = NULL;
         struct iphdr *iph;
         struct icmphdr  _icmph, *ic;
         struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
         struct ip_vs_iphdr ciph;
         struct ip_vs_conn *cp;
         struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
         unsigned int offset, ihl, verdict;
         union nf_inet_addr snet;
  
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
         if (cih == NULL)
                 return NF_ACCEPT; /* The packet looks wrong, ignore */
  
-       pp = ip_vs_proto_get(cih->protocol);
-       if (!pp)
+       net = skb_net(skb);
+       pd = ip_vs_proto_data_get(net, cih->protocol);
+       if (!pd)
                 return NF_ACCEPT;
+       pp = pd->pp;
  
         /* Is the embedded protocol header present? */
         if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
  
         ip_vs_fill_iphdr(AF_INET, cih, &ciph);
         /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
         if (!cp) {
                 /* The packet could also belong to a local client */
-               cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+               cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
                 if (cp) {
                         snet.ip = iph->saddr;
                         return handle_response_icmp(AF_INET, skb, &snet,
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
  static int
  ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
  {
+       struct net *net = NULL;
         struct ipv6hdr *iph;
         struct icmp6hdr _icmph, *ic;
         struct ipv6hdr  _ciph, *cih;    /* The ip header contained
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
         struct ip_vs_iphdr ciph;
         struct ip_vs_conn *cp;
         struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
         unsigned int offset, verdict;
         union nf_inet_addr snet;
         struct rt6_info *rt;
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
         if (cih == NULL)
                 return NF_ACCEPT; /* The packet looks wrong, ignore */
  
-       pp = ip_vs_proto_get(cih->nexthdr);
-       if (!pp)
+       net = skb_net(skb);
+       pd = ip_vs_proto_data_get(net, cih->nexthdr);
+       if (!pd)
                 return NF_ACCEPT;
+       pp = pd->pp;
  
         /* Is the embedded protocol header present? */
         /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
  
         ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
         /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
         if (!cp) {
                 /* The packet could also belong to a local client */
-               cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+               cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
                 if (cp) {
                         ipv6_addr_copy(&snet.in6, &iph->saddr);
                         return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
  static unsigned int
  ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  {
+       struct net *net;
         struct ip_vs_iphdr iph;
         struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
         struct ip_vs_conn *cp;
         int ret, restart, pkts;
+       struct netns_ipvs *ipvs;
  
         /* Already marked as IPVS request or reply? */
         if (skb->ipvs_property)
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
                         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
                 }
  
+       net = skb_net(skb);
         /* Protocol supported? */
-       pp = ip_vs_proto_get(iph.protocol);
-       if (unlikely(!pp))
+       pd = ip_vs_proto_data_get(net, iph.protocol);
+       if (unlikely(!pd))
                 return NF_ACCEPT;
-
+       pp = pd->pp;
         /*
          * Check if the packet belongs to an existing connection entry
          */
-       cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+       cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
  
         if (unlikely(!cp)) {
                 int v;
  
-               if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+               if (!pp->conn_schedule(af, skb, pd, &v, &cp))
                         return v;
         }
  
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
         }
  
         IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+       net = skb_net(skb);
+       ipvs = net_ipvs(net);
         /* Check the server status */
         if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                 /* the destination server is not available */
  
-               if (sysctl_ip_vs_expire_nodest_conn) {
+               if (ipvs->sysctl_expire_nodest_conn) {
                         /* try to expire the connection immediately */
                         ip_vs_conn_expire_now(cp);
                 }
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
         }
  
         ip_vs_in_stats(cp, skb);
-       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
         if (cp->packet_xmit)
                 ret = cp->packet_xmit(skb, cp, pp);
                 /* do not touch skb anymore */
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
          *
          * Sync connection if it is about to close to
          * encorage the standby servers to update the connections timeout
+        *
+        * For ONE_PKT let ip_vs_sync_conn() do the filter work.
          */
-       pkts = atomic_add_return(1, &cp->in_pkts);
-       if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               pkts = ipvs->sysctl_sync_threshold[0];
+       else
+               pkts = atomic_add_return(1, &cp->in_pkts);
+
+       if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
             cp->protocol == IPPROTO_SCTP) {
                 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-                       (pkts % sysctl_ip_vs_sync_threshold[1]
-                        == sysctl_ip_vs_sync_threshold[0])) ||
+                       (pkts % ipvs->sysctl_sync_threshold[1]
+                        == ipvs->sysctl_sync_threshold[0])) ||
                                 (cp->old_state != cp->state &&
                                  ((cp->state == IP_VS_SCTP_S_CLOSED) ||
                                   (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
                                   (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-                       ip_vs_sync_conn(cp);
+                       ip_vs_sync_conn(net, cp);
                         goto out;
                 }
         }
  
         /* Keep this block last: TCP and others with pp->num_states <= 1 */
-       else if (af == AF_INET &&
-           (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+       else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
             (((cp->protocol != IPPROTO_TCP ||
                cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-             (pkts % sysctl_ip_vs_sync_threshold[1]
-              == sysctl_ip_vs_sync_threshold[0])) ||
+             (pkts % ipvs->sysctl_sync_threshold[1]
+              == ipvs->sysctl_sync_threshold[0])) ||
              ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
               ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
                (cp->state == IP_VS_TCP_S_CLOSE) ||
                (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
                (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-               ip_vs_sync_conn(cp);
+               ip_vs_sync_conn(net, cp);
  out:
         cp->old_state = cp->state;
  
@@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
         },
  #endif
  };
+/*
+ *     Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+       struct netns_ipvs *ipvs;
+
+       ipvs = net_generic(net, ip_vs_net_id);
+       if (ipvs == NULL) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       ipvs->net = net;
+       /* Counters used for creating unique names */
+       ipvs->gen = atomic_read(&ipvs_netns_cnt);
+       atomic_inc(&ipvs_netns_cnt);
+       net->ipvs = ipvs;
+       printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
+                        sizeof(struct netns_ipvs), ipvs->gen);
+       return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
+       IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+       .init = __ip_vs_init,
+       .exit = __ip_vs_cleanup,
+       .id   = &ip_vs_net_id,
+       .size = sizeof(struct netns_ipvs),
+};
  
  /*
   *     Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
  {
         int ret;
  
-       ip_vs_estimator_init();
+       ret = register_pernet_subsys(&ipvs_core_ops);   /* Alloc ip_vs struct */
+       if (ret < 0)
+               return ret;
  
+       ip_vs_estimator_init();
         ret = ip_vs_control_init();
         if (ret < 0) {
                 pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
                 goto cleanup_app;
         }
  
+       ret = ip_vs_sync_init();
+       if (ret < 0) {
+               pr_err("can't setup sync data.\n");
+               goto cleanup_conn;
+       }
+
         ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
         if (ret < 0) {
                 pr_err("can't register hooks.\n");
-               goto cleanup_conn;
+               goto cleanup_sync;
         }
  
         pr_info("ipvs loaded.\n");
         return ret;
  
+cleanup_sync:
+       ip_vs_sync_cleanup();
    cleanup_conn:
         ip_vs_conn_cleanup();
    cleanup_app:
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
         ip_vs_control_cleanup();
    cleanup_estimator:
         ip_vs_estimator_cleanup();
+       unregister_pernet_subsys(&ipvs_core_ops);       /* free ip_vs struct */
         return ret;
  }
  
  static void __exit ip_vs_cleanup(void)
  {
         nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+       ip_vs_sync_cleanup();
         ip_vs_conn_cleanup();
         ip_vs_app_cleanup();
         ip_vs_protocol_cleanup();
         ip_vs_control_cleanup();
         ip_vs_estimator_cleanup();
+       unregister_pernet_subsys(&ipvs_core_ops);       /* free ip_vs struct */
         pr_info("ipvs unloaded.\n");
  }
  
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c

index 22f7ad5..09ca2ce 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
  #include <linux/mutex.h>
  
  #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
  #include <net/ip.h>
  #ifdef CONFIG_IP_VS_IPV6
  #include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
  /* lock for service table */
  static DEFINE_RWLOCK(__ip_vs_svc_lock);
  
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
  /* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
  
  #ifdef CONFIG_IP_VS_DEBUG
  static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void)
  
  #ifdef CONFIG_IP_VS_IPV6
  /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+                                   const struct in6_addr *addr)
  {
         struct rt6_info *rt;
         struct flowi fl = {
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
                 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
         };
  
-       rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
                         return 1;
  
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
   *     update_defense_level is called from keventd and from sysctl,
   *     so it needs to protect itself from softirqs
   */
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
  {
         struct sysinfo i;
         static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
         /* si_swapinfo(&i); */
         /* availmem = availmem - (i.totalswap - i.freeswap); */
  
-       nomem = (availmem < sysctl_ip_vs_amemthresh);
+       nomem = (availmem < ipvs->sysctl_amemthresh);
  
         local_bh_disable();
  
         /* drop_entry */
-       spin_lock(&__ip_vs_dropentry_lock);
-       switch (sysctl_ip_vs_drop_entry) {
+       spin_lock(&ipvs->dropentry_lock);
+       switch (ipvs->sysctl_drop_entry) {
         case 0:
-               atomic_set(&ip_vs_dropentry, 0);
+               atomic_set(&ipvs->dropentry, 0);
                 break;
         case 1:
                 if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
-                       sysctl_ip_vs_drop_entry = 2;
+                       atomic_set(&ipvs->dropentry, 1);
+                       ipvs->sysctl_drop_entry = 2;
                 } else {
-                       atomic_set(&ip_vs_dropentry, 0);
+                       atomic_set(&ipvs->dropentry, 0);
                 }
                 break;
         case 2:
                 if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
+                       atomic_set(&ipvs->dropentry, 1);
                 } else {
-                       atomic_set(&ip_vs_dropentry, 0);
-                       sysctl_ip_vs_drop_entry = 1;
+                       atomic_set(&ipvs->dropentry, 0);
+                       ipvs->sysctl_drop_entry = 1;
                 };
                 break;
         case 3:
-               atomic_set(&ip_vs_dropentry, 1);
+               atomic_set(&ipvs->dropentry, 1);
                 break;
         }
-       spin_unlock(&__ip_vs_dropentry_lock);
+       spin_unlock(&ipvs->dropentry_lock);
  
         /* drop_packet */
-       spin_lock(&__ip_vs_droppacket_lock);
-       switch (sysctl_ip_vs_drop_packet) {
+       spin_lock(&ipvs->droppacket_lock);
+       switch (ipvs->sysctl_drop_packet) {
         case 0:
-               ip_vs_drop_rate = 0;
+               ipvs->drop_rate = 0;
                 break;
         case 1:
                 if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
-                       sysctl_ip_vs_drop_packet = 2;
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
+                       ipvs->sysctl_drop_packet = 2;
                 } else {
-                       ip_vs_drop_rate = 0;
+                       ipvs->drop_rate = 0;
                 }
                 break;
         case 2:
                 if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
                 } else {
-                       ip_vs_drop_rate = 0;
-                       sysctl_ip_vs_drop_packet = 1;
+                       ipvs->drop_rate = 0;
+                       ipvs->sysctl_drop_packet = 1;
                 }
                 break;
         case 3:
-               ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+               ipvs->drop_rate = ipvs->sysctl_am_droprate;
                 break;
         }
-       spin_unlock(&__ip_vs_droppacket_lock);
+       spin_unlock(&ipvs->droppacket_lock);
  
         /* secure_tcp */
-       spin_lock(&ip_vs_securetcp_lock);
-       switch (sysctl_ip_vs_secure_tcp) {
+       spin_lock(&ipvs->securetcp_lock);
+       switch (ipvs->sysctl_secure_tcp) {
         case 0:
                 if (old_secure_tcp >= 2)
                         to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
                 if (nomem) {
                         if (old_secure_tcp < 2)
                                 to_change = 1;
-                       sysctl_ip_vs_secure_tcp = 2;
+                       ipvs->sysctl_secure_tcp = 2;
                 } else {
                         if (old_secure_tcp >= 2)
                                 to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
                 } else {
                         if (old_secure_tcp >= 2)
                                 to_change = 0;
-                       sysctl_ip_vs_secure_tcp = 1;
+                       ipvs->sysctl_secure_tcp = 1;
                 }
                 break;
         case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
                         to_change = 1;
                 break;
         }
-       old_secure_tcp = sysctl_ip_vs_secure_tcp;
+       old_secure_tcp = ipvs->sysctl_secure_tcp;
         if (to_change >= 0)
-               ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-       spin_unlock(&ip_vs_securetcp_lock);
+               ip_vs_protocol_timeout_change(ipvs,
+                                             ipvs->sysctl_secure_tcp > 1);
+       spin_unlock(&ipvs->securetcp_lock);
  
         local_bh_enable();
  }
@@ -250,16 +218,16 @@ static void update_defense_level(void)
   *     Timer for checking the defense
   */
  #define DEFENSE_TIMER_PERIOD   1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
  
  static void defense_work_handler(struct work_struct *work)
  {
-       update_defense_level();
-       if (atomic_read(&ip_vs_dropentry))
-               ip_vs_random_dropentry();
+       struct netns_ipvs *ipvs =
+               container_of(work, struct netns_ipvs, defense_work.work);
  
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+       update_defense_level(ipvs);
+       if (atomic_read(&ipvs->dropentry))
+               ip_vs_random_dropentry(ipvs->net);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
  }
  
  int
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
  /* the service table hashed by fwmark */
  static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
  
-/*
- *     Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *     Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *     FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
  
  /*
   *     Returns hash value for virtual service
   */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-                 __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+                 const union nf_inet_addr *addr, __be16 port)
  {
         register unsigned porth = ntohs(port);
         __be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
                 addr_fold = addr->ip6[0]^addr->ip6[1]^
                             addr->ip6[2]^addr->ip6[3];
  #endif
+       addr_fold ^= ((size_t)net>>8);
  
         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
                 & IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
  /*
   *     Returns hash value of fwmark for virtual service lookup
   */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
  {
-       return fwmark & IP_VS_SVC_TAB_MASK;
+       return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
  }
  
  /*
- *     Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *     Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
   *     or in the ip_vs_svc_fwm_table by fwmark.
   *     Should be called with locked tables.
   */
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
  
         if (svc->fwmark == 0) {
                 /*
-                *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+                *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
                  */
-               hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-                                        svc->port);
+               hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+                                        &svc->addr, svc->port);
                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
         } else {
                 /*
-                *  Hash it by fwmark in ip_vs_svc_fwm_table
+                *  Hash it by fwmark in svc_fwm_table
                  */
-               hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+               hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
         }
  
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
  
  
  /*
- *     Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *     Unhashes a service from svc_table / svc_fwm_table.
   *     Should be called with locked tables.
   */
  static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
         }
  
         if (svc->fwmark == 0) {
-               /* Remove it from the ip_vs_svc_table table */
+               /* Remove it from the svc_table table */
                 list_del(&svc->s_list);
         } else {
-               /* Remove it from the ip_vs_svc_fwm_table table */
+               /* Remove it from the svc_fwm_table table */
                 list_del(&svc->f_list);
         }
  
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
  
  
  /*
- *     Get service by {proto,addr,port} in the service table.
+ *     Get service by {netns, proto,addr,port} in the service table.
   */
  static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-                   __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+                    const union nf_inet_addr *vaddr, __be16 vport)
  {
         unsigned hash;
         struct ip_vs_service *svc;
  
         /* Check for "full" addressed entries */
-       hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+       hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
  
         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
                 if ((svc->af == af)
                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
                     && (svc->port == vport)
-                   && (svc->protocol == protocol)) {
+                   && (svc->protocol == protocol)
+                   && net_eq(svc->net, net)) {
                         /* HIT */
                         return svc;
                 }
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
   *     Get service by {fwmark} in the service table.
   */
  static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
  {
         unsigned hash;
         struct ip_vs_service *svc;
  
         /* Check for fwmark addressed entries */
-       hash = ip_vs_svc_fwm_hashkey(fwmark);
+       hash = ip_vs_svc_fwm_hashkey(net, fwmark);
  
         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-               if (svc->fwmark == fwmark && svc->af == af) {
+               if (svc->fwmark == fwmark && svc->af == af
+                   && net_eq(svc->net, net)) {
                         /* HIT */
                         return svc;
                 }
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
  }
  
  struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                   const union nf_inet_addr *vaddr, __be16 vport)
  {
         struct ip_vs_service *svc;
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
         read_lock(&__ip_vs_svc_lock);
  
         /*
          *      Check the table hashed by fwmark first
          */
-       if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+       svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+       if (fwmark && svc)
                 goto out;
  
         /*
          *      Check the table hashed by <protocol,addr,port>
          *      for "full" addressed entries
          */
-       svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+       svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
  
         if (svc == NULL
             && protocol == IPPROTO_TCP
-           && atomic_read(&ip_vs_ftpsvc_counter)
+           && atomic_read(&ipvs->ftpsvc_counter)
             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
                 /*
                  * Check if ftp service entry exists, the packet
                  * might belong to FTP data connections.
                  */
-               svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
         }
  
         if (svc == NULL
-           && atomic_read(&ip_vs_nullsvc_counter)) {
+           && atomic_read(&ipvs->nullsvc_counter)) {
                 /*
                  * Check if the catch-all port (port zero) exists
                  */
-               svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
         }
  
    out:
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
                               svc->fwmark,
                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
                               ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                 kfree(svc);
         }
  }
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
  }
  
  /*
- *     Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *     Hashes ip_vs_dest in rs_table by <proto,addr,port>.
   *     should be called with locked tables.
   */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
  {
         unsigned hash;
  
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
          */
         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
  
-       list_add(&dest->d_list, &ip_vs_rtable[hash]);
+       list_add(&dest->d_list, &ipvs->rs_table[hash]);
  
         return 1;
  }
  
  /*
- *     UNhashes ip_vs_dest from ip_vs_rtable.
+ *     UNhashes ip_vs_dest from rs_table.
   *     should be called with locked tables.
   */
  static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  {
         /*
-        * Remove it from the ip_vs_rtable table.
+        * Remove it from the rs_table table.
          */
         if (!list_empty(&dest->d_list)) {
                 list_del(&dest->d_list);
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
   *     Lookup real service by <proto,addr,port> in the real service table.
   */
  struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                           const union nf_inet_addr *daddr,
                           __be16 dport)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         unsigned hash;
         struct ip_vs_dest *dest;
  
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
          */
         hash = ip_vs_rs_hashkey(af, daddr, dport);
  
-       read_lock(&__ip_vs_rs_lock);
-       list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+       read_lock(&ipvs->rs_lock);
+       list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
                 if ((dest->af == af)
                     && ip_vs_addr_equal(af, &dest->addr, daddr)
                     && (dest->port == dport)
                     && ((dest->protocol == protocol) ||
                         dest->vfwmark)) {
                         /* HIT */
-                       read_unlock(&__ip_vs_rs_lock);
+                       read_unlock(&ipvs->rs_lock);
                         return dest;
                 }
         }
-       read_unlock(&__ip_vs_rs_lock);
+       read_unlock(&ipvs->rs_lock);
  
         return NULL;
  }
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
   * ip_vs_lookup_real_service() looked promissing, but
   * seems not working as expected.
   */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+                                  const union nf_inet_addr *daddr,
                                    __be16 dport,
                                    const union nf_inet_addr *vaddr,
-                                  __be16 vport, __u16 protocol)
+                                  __be16 vport, __u16 protocol, __u32 fwmark)
  {
         struct ip_vs_dest *dest;
         struct ip_vs_service *svc;
  
-       svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+       svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
         if (!svc)
                 return NULL;
         dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                      __be16 dport)
  {
         struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
  
         /*
          * Find the destination in trash
          */
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
                               "dest->refcnt=%d\n",
                               dest->vfwmark,
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                         list_del(&dest->n_list);
                         ip_vs_dst_reset(dest);
                         __ip_vs_unbind_svc(dest);
+                       free_percpu(dest->stats.cpustats);
                         kfree(dest);
                 }
         }
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
   *  are expired, and the refcnt of each destination in the trash must
   *  be 1, so we simply release them here.
   */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
  {
         struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                 list_del(&dest->n_list);
                 ip_vs_dst_reset(dest);
                 __ip_vs_unbind_svc(dest);
+               free_percpu(dest->stats.cpustats);
                 kfree(dest);
         }
  }
@@ -768,6 +728,7 @@ static void
  __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                     struct ip_vs_dest_user_kern *udest, int add)
  {
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
         int conn_flags;
  
         /* set the weight and the flags */
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
         } else {
                 /*
-                *    Put the real service in ip_vs_rtable if not present.
+                *    Put the real service in rs_table if not present.
                  *    For now only for NAT!
                  */
-               write_lock_bh(&__ip_vs_rs_lock);
-               ip_vs_rs_hash(dest);
-               write_unlock_bh(&__ip_vs_rs_lock);
+               write_lock_bh(&ipvs->rs_lock);
+               ip_vs_rs_hash(ipvs, dest);
+               write_unlock_bh(&ipvs->rs_lock);
         }
         atomic_set(&dest->conn_flags, conn_flags);
  
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
         spin_unlock(&dest->dst_lock);
  
         if (add)
-               ip_vs_new_estimator(&dest->stats);
+               ip_vs_new_estimator(svc->net, &dest->stats);
  
         write_lock_bh(&__ip_vs_svc_lock);
  
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                 atype = ipv6_addr_type(&udest->addr.in6);
                 if ((!(atype & IPV6_ADDR_UNICAST) ||
                         atype & IPV6_ADDR_LINKLOCAL) &&
-                       !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+                       !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
                         return -EINVAL;
         } else
  #endif
         {
-               atype = inet_addr_type(&init_net, udest->addr.ip);
+               atype = inet_addr_type(svc->net, udest->addr.ip);
                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
                         return -EINVAL;
         }
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                 pr_err("%s(): no memory.\n", __func__);
                 return -ENOMEM;
         }
+       dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!dest->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto err_alloc;
+       }
  
         dest->af = svc->af;
         dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
  
         LeaveFunction(2);
         return 0;
+
+err_alloc:
+       kfree(dest);
+       return -ENOMEM;
  }
  
  
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  /*
   *     Delete a destination (must be already unlinked from the service)
   */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
  {
-       ip_vs_kill_estimator(&dest->stats);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_kill_estimator(net, &dest->stats);
  
         /*
          *  Remove it from the d-linked list with the real services.
          */
-       write_lock_bh(&__ip_vs_rs_lock);
+       write_lock_bh(&ipvs->rs_lock);
         ip_vs_rs_unhash(dest);
-       write_unlock_bh(&__ip_vs_rs_lock);
+       write_unlock_bh(&ipvs->rs_lock);
  
         /*
          *  Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                    and only one user context can update virtual service at a
                    time, so the operation here is OK */
                 atomic_dec(&dest->svc->refcnt);
+               free_percpu(dest->stats.cpustats);
                 kfree(dest);
         } else {
                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
                               ntohs(dest->port),
                               atomic_read(&dest->refcnt));
-               list_add(&dest->n_list, &ip_vs_dest_trash);
+               list_add(&dest->n_list, &ipvs->dest_trash);
                 atomic_inc(&dest->refcnt);
         }
  }
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
         /*
          *      Delete the destination
          */
-       __ip_vs_del_dest(dest);
+       __ip_vs_del_dest(svc->net, dest);
  
         LeaveFunction(2);
  
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
   *     Add a service into the service hash table
   */
  static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
                   struct ip_vs_service **svc_p)
  {
         int ret = 0;
         struct ip_vs_scheduler *sched = NULL;
         struct ip_vs_pe *pe = NULL;
         struct ip_vs_service *svc = NULL;
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
         /* increase the module use count */
         ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
         }
  
         if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                 if (pe == NULL) {
                         pr_info("persistence engine module ip_vs_pe_%s "
                                 "not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                 ret = -ENOMEM;
                 goto out_err;
         }
+       svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!svc->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto out_err;
+       }
  
         /* I'm the first user of the service */
         atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
         svc->flags = u->flags;
         svc->timeout = u->timeout * HZ;
         svc->netmask = u->netmask;
+       svc->net = net;
  
         INIT_LIST_HEAD(&svc->destinations);
         rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
  
         /* Update the virtual service counters */
         if (svc->port == FTPPORT)
-               atomic_inc(&ip_vs_ftpsvc_counter);
+               atomic_inc(&ipvs->ftpsvc_counter);
         else if (svc->port == 0)
-               atomic_inc(&ip_vs_nullsvc_counter);
+               atomic_inc(&ipvs->nullsvc_counter);
  
-       ip_vs_new_estimator(&svc->stats);
+       ip_vs_new_estimator(net, &svc->stats);
  
         /* Count only IPv4 services for old get/setsockopt interface */
         if (svc->af == AF_INET)
-               ip_vs_num_services++;
+               ipvs->num_services++;
  
         /* Hash the service into the service table */
         write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
         *svc_p = svc;
         return 0;
  
+
   out_err:
         if (svc != NULL) {
                 ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                         ip_vs_app_inc_put(svc->inc);
                         local_bh_enable();
                 }
+               if (svc->stats.cpustats)
+                       free_percpu(svc->stats.cpustats);
                 kfree(svc);
         }
         ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
         old_sched = sched;
  
         if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                 if (pe == NULL) {
                         pr_info("persistence engine module ip_vs_pe_%s "
                                 "not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
         struct ip_vs_dest *dest, *nxt;
         struct ip_vs_scheduler *old_sched;
         struct ip_vs_pe *old_pe;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
  
         pr_info("%s: enter\n", __func__);
  
         /* Count only IPv4 services for old get/setsockopt interface */
         if (svc->af == AF_INET)
-               ip_vs_num_services--;
+               ipvs->num_services--;
  
-       ip_vs_kill_estimator(&svc->stats);
+       ip_vs_kill_estimator(svc->net, &svc->stats);
  
         /* Unbind scheduler */
         old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
          */
         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
                 __ip_vs_unlink_dest(svc, dest, 0);
-               __ip_vs_del_dest(dest);
+               __ip_vs_del_dest(svc->net, dest);
         }
  
         /*
          *    Update the virtual service counters
          */
         if (svc->port == FTPPORT)
-               atomic_dec(&ip_vs_ftpsvc_counter);
+               atomic_dec(&ipvs->ftpsvc_counter);
         else if (svc->port == 0)
-               atomic_dec(&ip_vs_nullsvc_counter);
+               atomic_dec(&ipvs->nullsvc_counter);
  
         /*
          *    Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
                               svc->fwmark,
                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
                               ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                 kfree(svc);
         }
  
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
  /*
   *     Flush all the virtual services
   */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
  {
         int idx;
         struct ip_vs_service *svc, *nxt;
  
         /*
-        * Flush the service table hashed by <protocol,addr,port>
+        * Flush the service table hashed by <netns,protocol,addr,port>
          */
         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_unlink_service(svc);
+               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+                                        s_list) {
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                 }
         }
  
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void)
         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry_safe(svc, nxt,
                                          &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_unlink_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                 }
         }
  
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
         return 0;
  }
  
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
  {
         int idx;
         struct ip_vs_service *svc;
  
         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                 }
         }
  
         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                 }
         }
  
-       ip_vs_zero_stats(&ip_vs_stats);
+       ip_vs_zero_stats(net_ipvs(net)->tot_stats);
         return 0;
  }
  
@@ -1498,6 +1488,7 @@ static int
  proc_do_defense_mode(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
  {
+       struct net *net = current->nsproxy->net_ns;
         int *valp = table->data;
         int val = *valp;
         int rc;
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write,
                         /* Restore the correct value */
                         *valp = val;
                 } else {
-                       update_defense_level();
+                       update_defense_level(net_ipvs(net));
                 }
         }
         return rc;
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
         return rc;
  }
  
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int *valp = table->data;
+       int val = *valp;
+       int rc;
+
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (write && (*valp != val)) {
+               if ((*valp < 0) || (*valp > 1)) {
+                       /* Restore the correct value */
+                       *valp = val;
+               } else {
+                       struct net *net = current->nsproxy->net_ns;
+                       ip_vs_sync_switch_mode(net, val);
+               }
+       }
+       return rc;
+}
  
  /*
   *     IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *     Do not change order or insert new entries without
+ *     align with netns init in __ip_vs_control_init()
   */
  
  static struct ctl_table vs_vars[] = {
         {
                 .procname       = "amemthresh",
-               .data           = &sysctl_ip_vs_amemthresh,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec,
         },
-#ifdef CONFIG_IP_VS_DEBUG
-       {
-               .procname       = "debug_level",
-               .data           = &sysctl_ip_vs_debug_level,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-#endif
         {
                 .procname       = "am_droprate",
-               .data           = &sysctl_ip_vs_am_droprate,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec,
         },
         {
                 .procname       = "drop_entry",
-               .data           = &sysctl_ip_vs_drop_entry,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_do_defense_mode,
         },
         {
                 .procname       = "drop_packet",
-               .data           = &sysctl_ip_vs_drop_packet,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_do_defense_mode,
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = {
  #ifdef CONFIG_IP_VS_NFCT
         {
                 .procname       = "conntrack",
-               .data           = &sysctl_ip_vs_conntrack,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = {
  #endif
         {
                 .procname       = "secure_tcp",
-               .data           = &sysctl_ip_vs_secure_tcp,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_do_defense_mode,
         },
         {
                 .procname       = "snat_reroute",
-               .data           = &sysctl_ip_vs_snat_reroute,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = &proc_dointvec,
         },
+       {
+               .procname       = "sync_version",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_do_sync_mode,
+       },
+       {
+               .procname       = "cache_bypass",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_nodest_conn",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_quiescent_template",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sync_threshold",
+               .maxlen         =
+                       sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+               .mode           = 0644,
+               .proc_handler   = proc_do_sync_threshold,
+       },
+       {
+               .procname       = "nat_icmp_send",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#ifdef CONFIG_IP_VS_DEBUG
+       {
+               .procname       = "debug_level",
+               .data           = &sysctl_ip_vs_debug_level,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#endif
  #if 0
         {
                 .procname       = "timeout_established",
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = {
                 .proc_handler   = proc_dointvec_jiffies,
         },
  #endif
-       {
-               .procname       = "cache_bypass",
-               .data           = &sysctl_ip_vs_cache_bypass,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "expire_nodest_conn",
-               .data           = &sysctl_ip_vs_expire_nodest_conn,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "expire_quiescent_template",
-               .data           = &sysctl_ip_vs_expire_quiescent_template,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "sync_threshold",
-               .data           = &sysctl_ip_vs_sync_threshold,
-               .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
-               .mode           = 0644,
-               .proc_handler   = proc_do_sync_threshold,
-       },
-       {
-               .procname       = "nat_icmp_send",
-               .data           = &sysctl_ip_vs_nat_icmp_send,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
         { }
  };
  
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = {
  };
  EXPORT_SYMBOL_GPL(net_vs_ctl_path);
  
-static struct ctl_table_header * sysctl_header;
-
  #ifdef CONFIG_PROC_FS
  
  struct ip_vs_iter {
+       struct seq_net_private p;  /* Do not move this, netns depends upon it*/
         struct list_head *table;
         int bucket;
  };
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
  /* Get the Nth entry in the two lists */
  static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
  {
+       struct net *net = seq_file_net(seq);
         struct ip_vs_iter *iter = seq->private;
         int idx;
         struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
         /* look in hash by protocol */
         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       if (pos-- == 0){
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                 iter->table = ip_vs_svc_table;
                                 iter->bucket = idx;
                                 return svc;
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
         /* keep looking in fwmark */
         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       if (pos-- == 0) {
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                 iter->table = ip_vs_svc_fwm_table;
                                 iter->bucket = idx;
                                 return svc;
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
  
  static int ip_vs_info_open(struct inode *inode, struct file *file)
  {
-       return seq_open_private(file, &ip_vs_info_seq_ops,
+       return seq_open_net(inode, file, &ip_vs_info_seq_ops,
                         sizeof(struct ip_vs_iter));
  }
  
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = {
  
  #endif
  
-struct ip_vs_stats ip_vs_stats = {
-       .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
  #ifdef CONFIG_PROC_FS
  static int ip_vs_stats_show(struct seq_file *seq, void *v)
  {
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
  
  /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
         seq_puts(seq,
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
         seq_printf(seq,
                    "   Conns  Packets  Packets            Bytes            Bytes\n");
  
-       spin_lock_bh(&ip_vs_stats.lock);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-                  ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-                  (unsigned long long) ip_vs_stats.ustats.inbytes,
-                  (unsigned long long) ip_vs_stats.ustats.outbytes);
+       spin_lock_bh(&tot_stats->lock);
+       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
+                  tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
  
  /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
         seq_puts(seq,
                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                       ip_vs_stats.ustats.cps,
-                       ip_vs_stats.ustats.inpps,
-                       ip_vs_stats.ustats.outpps,
-                       ip_vs_stats.ustats.inbps,
-                       ip_vs_stats.ustats.outbps);
-       spin_unlock_bh(&ip_vs_stats.lock);
+                       tot_stats->ustats.cps,
+                       tot_stats->ustats.inpps,
+                       tot_stats->ustats.outpps,
+                       tot_stats->ustats.inbps,
+                       tot_stats->ustats.outbps);
+       spin_unlock_bh(&tot_stats->lock);
  
         return 0;
  }
  
  static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
  {
-       return single_open(file, ip_vs_stats_show, NULL);
+       return single_open_net(inode, file, ip_vs_stats_show);
  }
  
  static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = {
         .release = single_release,
  };
  
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+       int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                "       Total Incoming Outgoing         Incoming         Outgoing\n");
+       seq_printf(seq,
+                  "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+       for_each_possible_cpu(i) {
+               struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+               seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+                           i, u->ustats.conns, u->ustats.inpkts,
+                           u->ustats.outpkts, (__u64)u->ustats.inbytes,
+                           (__u64)u->ustats.outbytes);
+       }
+
+       spin_lock_bh(&tot_stats->lock);
+       seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
+                  tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+                  tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+       seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
+                       tot_stats->ustats.cps,
+                       tot_stats->ustats.inpps,
+                       tot_stats->ustats.outpps,
+                       tot_stats->ustats.inbps,
+                       tot_stats->ustats.outbps);
+       spin_unlock_bh(&tot_stats->lock);
+
+       return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+       return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+       .owner = THIS_MODULE,
+       .open = ip_vs_stats_percpu_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
  #endif
  
  /*
   *     Set timeout values for tcp tcpfin udp in the timeout_table.
   */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
  {
+       struct ip_vs_proto_data *pd;
+
         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
                   u->tcp_timeout,
                   u->tcp_fin_timeout,
@@ -2010,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
  
  #ifdef CONFIG_IP_VS_PROTO_TCP
         if (u->tcp_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
                         = u->tcp_timeout * HZ;
         }
  
         if (u->tcp_fin_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
                         = u->tcp_fin_timeout * HZ;
         }
  #endif
  
  #ifdef CONFIG_IP_VS_PROTO_UDP
         if (u->udp_timeout) {
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+               pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+               pd->timeout_table[IP_VS_UDP_S_NORMAL]
                         = u->udp_timeout * HZ;
         }
  #endif
@@ -2087,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
  static int
  do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
  {
+       struct net *net = sock_net(sk);
         int ret;
         unsigned char arg[MAX_ARG_LEN];
         struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
  
         if (cmd == IP_VS_SO_SET_FLUSH) {
                 /* Flush the virtual service */
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                 goto out_unlock;
         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
                 /* Set timeout values for (tcp tcpfin udp) */
-               ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+               ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
                 goto out_unlock;
         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+               ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+                                       dm->syncid);
                 goto out_unlock;
         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = stop_sync_thread(dm->state);
+               ret = stop_sync_thread(net, dm->state);
                 goto out_unlock;
         }
  
@@ -2148,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
         if (cmd == IP_VS_SO_SET_ZERO) {
                 /* if no service address is set, zero counters in all */
                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-                       ret = ip_vs_zero_all();
+                       ret = ip_vs_zero_all(net);
                         goto out_unlock;
                 }
         }
@@ -2165,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
  
         /* Lookup the exact service by <protocol, addr, port> or fwmark */
         if (usvc.fwmark == 0)
-               svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+               svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
                                            &usvc.addr, usvc.port);
         else
-               svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+               svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
  
         if (cmd != IP_VS_SO_SET_ADD
             && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                 if (svc != NULL)
                         ret = -EEXIST;
                 else
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                 break;
         case IP_VS_SO_SET_EDIT:
                 ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
  }
  
  static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+                           const struct ip_vs_get_services *get,
                             struct ip_vs_get_services __user *uptr)
  {
         int idx, count=0;
@@ -2252,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
                         /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                 continue;
  
                         if (count >= get->num_services)
@@ -2271,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
                         /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                 continue;
  
                         if (count >= get->num_services)
@@ -2291,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
  }
  
  static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                          struct ip_vs_get_dests __user *uptr)
  {
         struct ip_vs_service *svc;
@@ -2299,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
         int ret = 0;
  
         if (get->fwmark)
-               svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+               svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
         else
-               svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+               svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
                                            get->port);
  
         if (svc) {
@@ -2336,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
  }
  
  static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
  {
+       struct ip_vs_proto_data *pd;
+
  #ifdef CONFIG_IP_VS_PROTO_TCP
-       u->tcp_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-       u->tcp_fin_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+       pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+       u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+       u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
  #endif
  #ifdef CONFIG_IP_VS_PROTO_UDP
+       pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
         u->udp_timeout =
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+                       pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
  #endif
  }
  
@@ -2375,7 +2444,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
         unsigned char arg[128];
         int ret = 0;
         unsigned int copylen;
+       struct net *net = sock_net(sk);
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
+       BUG_ON(!net);
         if (!capable(CAP_NET_ADMIN))
                 return -EPERM;
  
@@ -2418,7 +2490,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                 struct ip_vs_getinfo info;
                 info.version = IP_VS_VERSION_CODE;
                 info.size = ip_vs_conn_tab_size;
-               info.num_services = ip_vs_num_services;
+               info.num_services = ipvs->num_services;
                 if (copy_to_user(user, &info, sizeof(info)) != 0)
                         ret = -EFAULT;
         }
@@ -2437,7 +2509,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                         ret = -EINVAL;
                         goto out;
                 }
-               ret = __ip_vs_get_service_entries(get, user);
+               ret = __ip_vs_get_service_entries(net, get, user);
         }
         break;
  
@@ -2450,10 +2522,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                 entry = (struct ip_vs_service_entry *)arg;
                 addr.ip = entry->addr;
                 if (entry->fwmark)
-                       svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+                       svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
                 else
-                       svc = __ip_vs_service_find(AF_INET, entry->protocol,
-                                                  &addr, entry->port);
+                       svc = __ip_vs_service_find(net, AF_INET,
+                                                  entry->protocol, &addr,
+                                                  entry->port);
                 if (svc) {
                         ip_vs_copy_service(entry, svc);
                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2549,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                         ret = -EINVAL;
                         goto out;
                 }
-               ret = __ip_vs_get_dest_entries(get, user);
+               ret = __ip_vs_get_dest_entries(net, get, user);
         }
         break;
  
@@ -2484,7 +2557,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
         {
                 struct ip_vs_timeout_user t;
  
-               __ip_vs_get_timeouts(&t);
+               __ip_vs_get_timeouts(net, &t);
                 if (copy_to_user(user, &t, sizeof(t)) != 0)
                         ret = -EFAULT;
         }
@@ -2495,15 +2568,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                 struct ip_vs_daemon_user d[2];
  
                 memset(&d, 0, sizeof(d));
-               if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+               if (ipvs->sync_state & IP_VS_STATE_MASTER) {
                         d[0].state = IP_VS_STATE_MASTER;
-                       strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-                       d[0].syncid = ip_vs_master_syncid;
+                       strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+                               sizeof(d[0].mcast_ifn));
+                       d[0].syncid = ipvs->master_syncid;
                 }
-               if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+               if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
                         d[1].state = IP_VS_STATE_BACKUP;
-                       strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-                       d[1].syncid = ip_vs_backup_syncid;
+                       strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+                               sizeof(d[1].mcast_ifn));
+                       d[1].syncid = ipvs->backup_syncid;
                 }
                 if (copy_to_user(user, &d, sizeof(d)) != 0)
                         ret = -EFAULT;
@@ -2542,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = {
         .name           = IPVS_GENL_NAME,
         .version        = IPVS_GENL_VERSION,
         .maxattr        = IPVS_CMD_MAX,
+       .netnsok        = true,         /* Make ipvsadm to work on netns */
  };
  
  /* Policy used for first-level command attributes */
@@ -2696,11 +2772,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
         int idx = 0, i;
         int start = cb->args[0];
         struct ip_vs_service *svc;
+       struct net *net = skb_sknet(skb);
  
         mutex_lock(&__ip_vs_mutex);
         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-                       if (++idx <= start)
+                       if (++idx <= start || !net_eq(svc->net, net))
                                 continue;
                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
                                 idx--;
@@ -2711,7 +2788,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
  
         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-                       if (++idx <= start)
+                       if (++idx <= start || !net_eq(svc->net, net))
                                 continue;
                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
                                 idx--;
@@ -2727,7 +2804,8 @@ nla_put_failure:
         return skb->len;
  }
  
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+                                   struct ip_vs_service_user_kern *usvc,
                                     struct nlattr *nla, int full_entry,
                                     struct ip_vs_service **ret_svc)
  {
@@ -2770,9 +2848,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
         }
  
         if (usvc->fwmark)
-               svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+               svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
         else
-               svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+               svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
                                            &usvc->addr, usvc->port);
         *ret_svc = svc;
  
@@ -2809,13 +2887,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
         return 0;
  }
  
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+                                                    struct nlattr *nla)
  {
         struct ip_vs_service_user_kern usvc;
         struct ip_vs_service *svc;
         int ret;
  
-       ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+       ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
         return ret ? ERR_PTR(ret) : svc;
  }
  
@@ -2883,6 +2962,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
         struct ip_vs_service *svc;
         struct ip_vs_dest *dest;
         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+       struct net *net = skb_sknet(skb);
  
         mutex_lock(&__ip_vs_mutex);
  
@@ -2891,7 +2971,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
                 goto out_err;
  
-       svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+
+       svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
         if (IS_ERR(svc) || svc == NULL)
                 goto out_err;
  
@@ -3005,20 +3086,23 @@ nla_put_failure:
  static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
                                    struct netlink_callback *cb)
  {
+       struct net *net = skb_net(skb);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
         mutex_lock(&__ip_vs_mutex);
-       if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+       if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-                                          ip_vs_master_mcast_ifn,
-                                          ip_vs_master_syncid, cb) < 0)
+                                          ipvs->master_mcast_ifn,
+                                          ipvs->master_syncid, cb) < 0)
                         goto nla_put_failure;
  
                 cb->args[0] = 1;
         }
  
-       if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+       if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-                                          ip_vs_backup_mcast_ifn,
-                                          ip_vs_backup_syncid, cb) < 0)
+                                          ipvs->backup_mcast_ifn,
+                                          ipvs->backup_syncid, cb) < 0)
                         goto nla_put_failure;
  
                 cb->args[1] = 1;
@@ -3030,31 +3114,33 @@ nla_put_failure:
         return skb->len;
  }
  
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
  {
         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
                 return -EINVAL;
  
-       return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+       return start_sync_thread(net,
+                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
  }
  
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
  {
         if (!attrs[IPVS_DAEMON_ATTR_STATE])
                 return -EINVAL;
  
-       return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+       return stop_sync_thread(net,
+                               nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
  }
  
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
  {
         struct ip_vs_timeout_user t;
  
-       __ip_vs_get_timeouts(&t);
+       __ip_vs_get_timeouts(net, &t);
  
         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3152,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
  
-       return ip_vs_set_timeout(&t);
+       return ip_vs_set_timeout(net, &t);
  }
  
  static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3162,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
         struct ip_vs_dest_user_kern udest;
         int ret = 0, cmd;
         int need_full_svc = 0, need_full_dest = 0;
+       struct net *net;
+       struct netns_ipvs *ipvs;
  
+       net = skb_sknet(skb);
+       ipvs = net_ipvs(net);
         cmd = info->genlhdr->cmd;
  
         mutex_lock(&__ip_vs_mutex);
  
         if (cmd == IPVS_CMD_FLUSH) {
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                 goto out;
         } else if (cmd == IPVS_CMD_SET_CONFIG) {
-               ret = ip_vs_genl_set_config(info->attrs);
+               ret = ip_vs_genl_set_config(net, info->attrs);
                 goto out;
         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
                    cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3191,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
                 }
  
                 if (cmd == IPVS_CMD_NEW_DAEMON)
-                       ret = ip_vs_genl_new_daemon(daemon_attrs);
+                       ret = ip_vs_genl_new_daemon(net, daemon_attrs);
                 else
-                       ret = ip_vs_genl_del_daemon(daemon_attrs);
+                       ret = ip_vs_genl_del_daemon(net, daemon_attrs);
                 goto out;
         } else if (cmd == IPVS_CMD_ZERO &&
                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-               ret = ip_vs_zero_all();
+               ret = ip_vs_zero_all(net);
                 goto out;
         }
  
@@ -3117,7 +3207,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
                 need_full_svc = 1;
  
-       ret = ip_vs_genl_parse_service(&usvc,
+       ret = ip_vs_genl_parse_service(net, &usvc,
                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
                                        need_full_svc, &svc);
         if (ret)
@@ -3147,7 +3237,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
         switch (cmd) {
         case IPVS_CMD_NEW_SERVICE:
                 if (svc == NULL)
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                 else
                         ret = -EEXIST;
                 break;
@@ -3185,7 +3275,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
         struct sk_buff *msg;
         void *reply;
         int ret, cmd, reply_cmd;
+       struct net *net;
+       struct netns_ipvs *ipvs;
  
+       net = skb_sknet(skb);
+       ipvs = net_ipvs(net);
         cmd = info->genlhdr->cmd;
  
         if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3308,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
         {
                 struct ip_vs_service *svc;
  
-               svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+               svc = ip_vs_genl_find_service(net,
+                                             info->attrs[IPVS_CMD_ATTR_SERVICE]);
                 if (IS_ERR(svc)) {
                         ret = PTR_ERR(svc);
                         goto out_err;
@@ -3234,7 +3329,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
         {
                 struct ip_vs_timeout_user t;
  
-               __ip_vs_get_timeouts(&t);
+               __ip_vs_get_timeouts(net, &t);
  #ifdef CONFIG_IP_VS_PROTO_TCP
                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3475,172 @@ static void ip_vs_genl_unregister(void)
  
  /* End of Generic Netlink interface definitions */
  
+/*
+ * per netns intit/exit func.
+ */
+int __net_init __ip_vs_control_init(struct net *net)
+{
+       int idx;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ctl_table *tbl;
+
+       atomic_set(&ipvs->dropentry, 0);
+       spin_lock_init(&ipvs->dropentry_lock);
+       spin_lock_init(&ipvs->droppacket_lock);
+       spin_lock_init(&ipvs->securetcp_lock);
+       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+       /* Initialize rs_table */
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+       INIT_LIST_HEAD(&ipvs->dest_trash);
+       atomic_set(&ipvs->ftpsvc_counter, 0);
+       atomic_set(&ipvs->nullsvc_counter, 0);
+
+       /* procfs stats */
+       ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+       if (ipvs->tot_stats == NULL) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!ipvs->cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto err_alloc;
+       }
+       spin_lock_init(&ipvs->tot_stats->lock);
+
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+                            &ip_vs_stats_percpu_fops);
+
+       if (!net_eq(net, &init_net)) {
+               tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+               if (tbl == NULL)
+                       goto err_dup;
+       } else
+               tbl = vs_vars;
+       /* Initialize sysctl defaults */
+       idx = 0;
+       ipvs->sysctl_amemthresh = 1024;
+       tbl[idx++].data = &ipvs->sysctl_amemthresh;
+       ipvs->sysctl_am_droprate = 10;
+       tbl[idx++].data = &ipvs->sysctl_am_droprate;
+       tbl[idx++].data = &ipvs->sysctl_drop_entry;
+       tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+       tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+       tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+       ipvs->sysctl_snat_reroute = 1;
+       tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+       ipvs->sysctl_sync_ver = 1;
+       tbl[idx++].data = &ipvs->sysctl_sync_ver;
+       tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+       tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+       tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+       ipvs->sysctl_sync_threshold[0] = 3;
+       ipvs->sysctl_sync_threshold[1] = 50;
+       tbl[idx].data = &ipvs->sysctl_sync_threshold;
+       tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+       tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+       ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
+                                                 vs_vars);
+       if (ipvs->sysctl_hdr == NULL)
+               goto err_reg;
+       ip_vs_new_estimator(net, ipvs->tot_stats);
+       ipvs->sysctl_tbl = tbl;
+       /* Schedule defense work */
+       INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+       return 0;
+
+err_reg:
+       if (!net_eq(net, &init_net))
+               kfree(tbl);
+err_dup:
+       free_percpu(ipvs->cpustats);
+err_alloc:
+       kfree(ipvs->tot_stats);
+       return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_trash_cleanup(net);
+       ip_vs_kill_estimator(net, ipvs->tot_stats);
+       cancel_delayed_work_sync(&ipvs->defense_work);
+       cancel_work_sync(&ipvs->defense_work.work);
+       unregister_net_sysctl_table(ipvs->sysctl_hdr);
+       proc_net_remove(net, "ip_vs_stats_percpu");
+       proc_net_remove(net, "ip_vs_stats");
+       proc_net_remove(net, "ip_vs");
+       free_percpu(ipvs->cpustats);
+       kfree(ipvs->tot_stats);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+       .init = __ip_vs_control_init,
+       .exit = __ip_vs_control_cleanup,
+};
  
  int __init ip_vs_control_init(void)
  {
-       int ret;
         int idx;
+       int ret;
  
         EnterFunction(2);
  
-       /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+       /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
         }
-       for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-               INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+       ret = register_pernet_subsys(&ipvs_control_ops);
+       if (ret) {
+               pr_err("cannot register namespace.\n");
+               goto err;
         }
-       smp_wmb();
+
+       smp_wmb();      /* Do we really need it now ? */
  
         ret = nf_register_sockopt(&ip_vs_sockopts);
         if (ret) {
                 pr_err("cannot register sockopt.\n");
-               return ret;
+               goto err_net;
         }
  
         ret = ip_vs_genl_register();
         if (ret) {
                 pr_err("cannot register Generic Netlink interface.\n");
                 nf_unregister_sockopt(&ip_vs_sockopts);
-               return ret;
+               goto err_net;
         }
  
-       proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-       proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
-       ip_vs_new_estimator(&ip_vs_stats);
-
-       /* Hook the defense timer */
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
         LeaveFunction(2);
         return 0;
+
+err_net:
+       unregister_pernet_subsys(&ipvs_control_ops);
+err:
+       return ret;
  }
  
  
  void ip_vs_control_cleanup(void)
  {
         EnterFunction(2);
-       ip_vs_trash_cleanup();
-       cancel_delayed_work_sync(&defense_work);
-       cancel_work_sync(&defense_work.work);
-       ip_vs_kill_estimator(&ip_vs_stats);
-       unregister_sysctl_table(sysctl_header);
-       proc_net_remove(&init_net, "ip_vs_stats");
-       proc_net_remove(&init_net, "ip_vs");
+       unregister_pernet_subsys(&ipvs_control_ops);
         ip_vs_genl_unregister();
         nf_unregister_sockopt(&ip_vs_sockopts);
         LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c

index ff28801..f560a05 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -8,8 +8,12 @@
   *              as published by the Free Software Foundation; either version
   *              2 of the License, or (at your option) any later version.
   *
- * Changes:
- *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              Affected data: est_list and est_lock.
+ *              estimation_timer() runs with timer per netns.
+ *              get_stats()) do the per cpu summing.
   */
  
  #define KMSG_COMPONENT "IPVS"
@@ -48,11 +52,42 @@
   */
  
  
-static void estimation_timer(unsigned long arg);
+/*
+ * Make a summary from each cpu
+ */
+static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+                                struct ip_vs_cpu_stats *stats)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
+               unsigned int start;
+               __u64 inbytes, outbytes;
+               if (i) {
+                       sum->conns += s->ustats.conns;
+                       sum->inpkts += s->ustats.inpkts;
+                       sum->outpkts += s->ustats.outpkts;
+                       do {
+                               start = u64_stats_fetch_begin_bh(&s->syncp);
+                               inbytes = s->ustats.inbytes;
+                               outbytes = s->ustats.outbytes;
+                       } while (u64_stats_fetch_retry_bh(&s->syncp, start));
+                       sum->inbytes += inbytes;
+                       sum->outbytes += outbytes;
+               } else {
+                       sum->conns = s->ustats.conns;
+                       sum->inpkts = s->ustats.inpkts;
+                       sum->outpkts = s->ustats.outpkts;
+                       do {
+                               start = u64_stats_fetch_begin_bh(&s->syncp);
+                               sum->inbytes = s->ustats.inbytes;
+                               sum->outbytes = s->ustats.outbytes;
+                       } while (u64_stats_fetch_retry_bh(&s->syncp, start));
+               }
+       }
+}
  
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
  
  static void estimation_timer(unsigned long arg)
  {
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg)
         u32 n_inpkts, n_outpkts;
         u64 n_inbytes, n_outbytes;
         u32 rate;
+       struct net *net = (struct net *)arg;
+       struct netns_ipvs *ipvs;
  
-       spin_lock(&est_lock);
-       list_for_each_entry(e, &est_list, list) {
+       ipvs = net_ipvs(net);
+       ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
+       spin_lock(&ipvs->est_lock);
+       list_for_each_entry(e, &ipvs->est_list, list) {
                 s = container_of(e, struct ip_vs_stats, est);
  
+               ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
                 spin_lock(&s->lock);
                 n_conns = s->ustats.conns;
                 n_inpkts = s->ustats.inpkts;
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg)
                 n_outbytes = s->ustats.outbytes;
  
                 /* scaled by 2^10, but divided 2 seconds */
-               rate = (n_conns - e->last_conns)<<9;
+               rate = (n_conns - e->last_conns) << 9;
                 e->last_conns = n_conns;
-               e->cps += ((long)rate - (long)e->cps)>>2;
-               s->ustats.cps = (e->cps+0x1FF)>>10;
+               e->cps += ((long)rate - (long)e->cps) >> 2;
+               s->ustats.cps = (e->cps + 0x1FF) >> 10;
  
-               rate = (n_inpkts - e->last_inpkts)<<9;
+               rate = (n_inpkts - e->last_inpkts) << 9;
                 e->last_inpkts = n_inpkts;
-               e->inpps += ((long)rate - (long)e->inpps)>>2;
-               s->ustats.inpps = (e->inpps+0x1FF)>>10;
+               e->inpps += ((long)rate - (long)e->inpps) >> 2;
+               s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
  
-               rate = (n_outpkts - e->last_outpkts)<<9;
+               rate = (n_outpkts - e->last_outpkts) << 9;
                 e->last_outpkts = n_outpkts;
-               e->outpps += ((long)rate - (long)e->outpps)>>2;
-               s->ustats.outpps = (e->outpps+0x1FF)>>10;
+               e->outpps += ((long)rate - (long)e->outpps) >> 2;
+               s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
  
-               rate = (n_inbytes - e->last_inbytes)<<4;
+               rate = (n_inbytes - e->last_inbytes) << 4;
                 e->last_inbytes = n_inbytes;
-               e->inbps += ((long)rate - (long)e->inbps)>>2;
-               s->ustats.inbps = (e->inbps+0xF)>>5;
+               e->inbps += ((long)rate - (long)e->inbps) >> 2;
+               s->ustats.inbps = (e->inbps + 0xF) >> 5;
  
-               rate = (n_outbytes - e->last_outbytes)<<4;
+               rate = (n_outbytes - e->last_outbytes) << 4;
                 e->last_outbytes = n_outbytes;
-               e->outbps += ((long)rate - (long)e->outbps)>>2;
-               s->ustats.outbps = (e->outbps+0xF)>>5;
+               e->outbps += ((long)rate - (long)e->outbps) >> 2;
+               s->ustats.outbps = (e->outbps + 0xF) >> 5;
                 spin_unlock(&s->lock);
         }
-       spin_unlock(&est_lock);
-       mod_timer(&est_timer, jiffies + 2*HZ);
+       spin_unlock(&ipvs->est_lock);
+       mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
  }
  
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         struct ip_vs_estimator *est = &stats->est;
  
         INIT_LIST_HEAD(&est->list);
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
         est->last_outbytes = stats->ustats.outbytes;
         est->outbps = stats->ustats.outbps<<5;
  
-       spin_lock_bh(&est_lock);
-       list_add(&est->list, &est_list);
-       spin_unlock_bh(&est_lock);
+       spin_lock_bh(&ipvs->est_lock);
+       list_add(&est->list, &ipvs->est_list);
+       spin_unlock_bh(&ipvs->est_lock);
  }
  
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         struct ip_vs_estimator *est = &stats->est;
  
-       spin_lock_bh(&est_lock);
+       spin_lock_bh(&ipvs->est_lock);
         list_del(&est->list);
-       spin_unlock_bh(&est_lock);
+       spin_unlock_bh(&ipvs->est_lock);
  }
  
  void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
         est->outbps = 0;
  }
  
-int __init ip_vs_estimator_init(void)
+static int __net_init __ip_vs_estimator_init(struct net *net)
  {
-       mod_timer(&est_timer, jiffies + 2 * HZ);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->est_list);
+       spin_lock_init(&ipvs->est_lock);
+       setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+       mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
         return 0;
  }
  
+static void __net_exit __ip_vs_estimator_exit(struct net *net)
+{
+       del_timer_sync(&net_ipvs(net)->est_timer);
+}
+static struct pernet_operations ip_vs_app_ops = {
+       .init = __ip_vs_estimator_init,
+       .exit = __ip_vs_estimator_exit,
+};
+
+int __init ip_vs_estimator_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_app_ops);
+       return rv;
+}
+
  void ip_vs_estimator_cleanup(void)
  {
-       del_timer_sync(&est_timer);
+       unregister_pernet_subsys(&ip_vs_app_ops);
  }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c

index 7545500..6b5dd6d 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
         int ret = 0;
         enum ip_conntrack_info ctinfo;
         struct nf_conn *ct;
+       struct net *net;
  
  #ifdef CONFIG_IP_VS_IPV6
         /* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                  */
                 {
                         struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(AF_INET, iph->protocol,
-                                             &from, port, &cp->caddr, 0, &p);
+                       ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+                                             iph->protocol, &from, port,
+                                             &cp->caddr, 0, &p);
                         n_cp = ip_vs_conn_out_get(&p);
                 }
                 if (!n_cp) {
                         struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+                       ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+                                             AF_INET, IPPROTO_TCP, &cp->caddr,
                                               0, &cp->vaddr, port, &p);
                         n_cp = ip_vs_conn_new(&p, &from, port,
                                               IP_VS_CONN_F_NO_CPORT |
                                               IP_VS_CONN_F_NFCT,
-                                             cp->dest);
+                                             cp->dest, skb->mark);
                         if (!n_cp)
                                 return 0;
  
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                  * would be adjusted twice.
                  */
  
+               net = skb_net(skb);
                 cp->app_data = NULL;
-               ip_vs_tcp_conn_listen(n_cp);
+               ip_vs_tcp_conn_listen(net, n_cp);
                 ip_vs_conn_put(n_cp);
                 return ret;
         }
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
         union nf_inet_addr to;
         __be16 port;
         struct ip_vs_conn *n_cp;
+       struct net *net;
  
  #ifdef CONFIG_IP_VS_IPV6
         /* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
  
         {
                 struct ip_vs_conn_param p;
-               ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
-                                     &cp->vaddr, htons(ntohs(cp->vport)-1),
-                                     &p);
+               ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+                                     iph->protocol, &to, port, &cp->vaddr,
+                                     htons(ntohs(cp->vport)-1), &p);
                 n_cp = ip_vs_conn_in_get(&p);
                 if (!n_cp) {
                         n_cp = ip_vs_conn_new(&p, &cp->daddr,
                                               htons(ntohs(cp->dport)-1),
-                                             IP_VS_CONN_F_NFCT, cp->dest);
+                                             IP_VS_CONN_F_NFCT, cp->dest,
+                                             skb->mark);
                         if (!n_cp)
                                 return 0;
  
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
         /*
          *      Move tunnel to listen state
          */
-       ip_vs_tcp_conn_listen(n_cp);
+       net = skb_net(skb);
+       ip_vs_tcp_conn_listen(net, n_cp);
         ip_vs_conn_put(n_cp);
  
         return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
         .pkt_in =       ip_vs_ftp_in,
  };
  
-
  /*
- *     ip_vs_ftp initialization
+ *     per netns ip_vs_ftp initialization
   */
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
  {
         int i, ret;
         struct ip_vs_app *app = &ip_vs_ftp;
  
-       ret = register_ip_vs_app(app);
+       ret = register_ip_vs_app(net, app);
         if (ret)
                 return ret;
  
         for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
                 if (!ports[i])
                         continue;
-               ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+               ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
                 if (ret)
                         break;
                 pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
         }
  
         if (ret)
-               unregister_ip_vs_app(app);
+               unregister_ip_vs_app(net, app);
  
         return ret;
  }
+/*
+ *     netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+       struct ip_vs_app *app = &ip_vs_ftp;
+
+       unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+       .init = __ip_vs_ftp_init,
+       .exit = __ip_vs_ftp_exit,
+};
  
+int __init ip_vs_ftp_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_ftp_ops);
+       return rv;
+}
  
  /*
   *     ip_vs_ftp finish.
   */
  static void __exit ip_vs_ftp_exit(void)
  {
-       unregister_ip_vs_app(&ip_vs_ftp);
+       unregister_pernet_subsys(&ip_vs_ftp_ops);
  }
  
  
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c

index 9323f89..d5bec33 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
   *    entries that haven't been touched for a day.
   */
  #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
  
  
  /*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
  static ctl_table vs_vars_table[] = {
         {
                 .procname       = "lblc_expiration",
-               .data           = &sysctl_ip_vs_lblc_expiration,
+               .data           = NULL,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
         { }
  };
  
-static struct ctl_table_header * sysctl_header;
-
  static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
  {
         list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
         struct ip_vs_lblc_entry *en, *nxt;
         unsigned long now = jiffies;
         int i, j;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
  
         for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
                 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
                 write_lock(&svc->sched_lock);
                 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
                         if (time_before(now,
-                                       en->lastuse + sysctl_ip_vs_lblc_expiration))
+                                       en->lastuse +
+                                       ipvs->sysctl_lblc_expiration))
                                 continue;
  
                         ip_vs_lblc_free(en);
@@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
         .schedule =             ip_vs_lblc_schedule,
  };
  
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblc_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       if (!net_eq(net, &init_net)) {
+               ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+                                               sizeof(vs_vars_table),
+                                               GFP_KERNEL);
+               if (ipvs->lblc_ctl_table == NULL)
+                       goto err_dup;
+       } else
+               ipvs->lblc_ctl_table = vs_vars_table;
+       ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+       ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+       ipvs->lblc_ctl_header =
+               register_net_sysctl_table(net, net_vs_ctl_path,
+                                         ipvs->lblc_ctl_table);
+       if (!ipvs->lblc_ctl_header)
+               goto err_reg;
+
+       return 0;
+
+err_reg:
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblc_ctl_table);
+
+err_dup:
+       return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblc_exit(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       unregister_net_sysctl_table(ipvs->lblc_ctl_header);
+
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblc_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblc_ops = {
+       .init = __ip_vs_lblc_init,
+       .exit = __ip_vs_lblc_exit,
+};
  
  static int __init ip_vs_lblc_init(void)
  {
         int ret;
  
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+       ret = register_pernet_subsys(&ip_vs_lblc_ops);
+       if (ret)
+               return ret;
+
         ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
         if (ret)
-               unregister_sysctl_table(sysctl_header);
+               unregister_pernet_subsys(&ip_vs_lblc_ops);
         return ret;
  }
  
-
  static void __exit ip_vs_lblc_cleanup(void)
  {
-       unregister_sysctl_table(sysctl_header);
         unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+       unregister_pernet_subsys(&ip_vs_lblc_ops);
  }
  
  
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c

index dbeed8e..61ae8cf 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -70,8 +70,6 @@
   *    entries that haven't been touched for a day.
   */
  #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
  
  /*
   *     for IPVS lblcr entry hash table
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table {
  static ctl_table vs_vars_table[] = {
         {
                 .procname       = "lblcr_expiration",
-               .data           = &sysctl_ip_vs_lblcr_expiration,
+               .data           = NULL,
                 .maxlen         = sizeof(int),
                 .mode           = 0644,
                 .proc_handler   = proc_dointvec_jiffies,
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = {
         { }
  };
  
-static struct ctl_table_header * sysctl_header;
-
  static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
  {
         list_del(&en->list);
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
         unsigned long now = jiffies;
         int i, j;
         struct ip_vs_lblcr_entry *en, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
  
         for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
                 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
  
                 write_lock(&svc->sched_lock);
                 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-                       if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
-                                      now))
+                       if (time_after(en->lastuse
+                                       + ipvs->sysctl_lblcr_expiration, now))
                                 continue;
  
                         ip_vs_lblcr_free(en);
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
         read_lock(&svc->sched_lock);
         en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
         if (en) {
+               struct netns_ipvs *ipvs = net_ipvs(svc->net);
                 /* We only hold a read lock, but this is atomic */
                 en->lastuse = jiffies;
  
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                 /* More than one destination + enough time passed by, cleanup */
                 if (atomic_read(&en->set.size) > 1 &&
                                 time_after(jiffies, en->set.lastmod +
-                               sysctl_ip_vs_lblcr_expiration)) {
+                               ipvs->sysctl_lblcr_expiration)) {
                         struct ip_vs_dest *m;
  
                         write_lock(&en->set.lock);
@@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
         .schedule =             ip_vs_lblcr_schedule,
  };
  
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblcr_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       if (!net_eq(net, &init_net)) {
+               ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+                                               sizeof(vs_vars_table),
+                                               GFP_KERNEL);
+               if (ipvs->lblcr_ctl_table == NULL)
+                       goto err_dup;
+       } else
+               ipvs->lblcr_ctl_table = vs_vars_table;
+       ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+       ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+       ipvs->lblcr_ctl_header =
+               register_net_sysctl_table(net, net_vs_ctl_path,
+                                         ipvs->lblcr_ctl_table);
+       if (!ipvs->lblcr_ctl_header)
+               goto err_reg;
+
+       return 0;
+
+err_reg:
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblcr_ctl_table);
+
+err_dup:
+       return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblcr_exit(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
+
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblcr_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblcr_ops = {
+       .init = __ip_vs_lblcr_init,
+       .exit = __ip_vs_lblcr_exit,
+};
  
  static int __init ip_vs_lblcr_init(void)
  {
         int ret;
  
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+       ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+       if (ret)
+               return ret;
+
         ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
         if (ret)
-               unregister_sysctl_table(sysctl_header);
+               unregister_pernet_subsys(&ip_vs_lblcr_ops);
         return ret;
  }
  
-
  static void __exit ip_vs_lblcr_cleanup(void)
  {
-       unregister_sysctl_table(sysctl_header);
         unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+       unregister_pernet_subsys(&ip_vs_lblcr_ops);
  }
  
  
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c

index 4680647..f454c80 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
         struct nf_conntrack_tuple *orig, new_reply;
         struct ip_vs_conn *cp;
         struct ip_vs_conn_param p;
+       struct net *net = nf_ct_net(ct);
  
         if (exp->tuple.src.l3num != PF_INET)
                 return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
  
         /* RS->CLIENT */
         orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-       ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+       ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
                               &orig->src.u3, orig->src.u.tcp.port,
                               &orig->dst.u3, orig->dst.u.tcp.port, &p);
         cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
                 " for conn " FMT_CONN "\n",
                 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
  
-       h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+       h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+                                 &tuple);
         if (h) {
                 ct = nf_ct_tuplehash_to_ctrack(h);
                 /* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c

index 3414af7..5cf859c 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
  }
  
  /* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
  {
         struct ip_vs_pe *pe;
  
-       IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+       IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
                   pe_name);
  
         spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
  }
  
  /* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
  {
         struct ip_vs_pe *pe;
  
         /* Search for the pe by name */
-       pe = ip_vs_pe_getbyname(name);
+       pe = __ip_vs_pe_getbyname(name);
  
         /* If pe not found, load the module and search again */
         if (!pe) {
                 request_module("ip_vs_pe_%s", name);
-               pe = ip_vs_pe_getbyname(name);
+               pe = __ip_vs_pe_getbyname(name);
         }
  
         return pe;
  }
  
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
-       if (pe && pe->module)
-               module_put(pe->module);
-}
-
  /* Register a pe in the pe list */
  int register_ip_vs_pe(struct ip_vs_pe *pe)
  {
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c

index b8b4e96..0d83bc0 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
         struct ip_vs_iphdr iph;
         unsigned int dataoff, datalen, matchoff, matchlen;
         const char *dptr;
+       int retc;
  
         ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
  
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
         if (dataoff >= skb->len)
                 return -EINVAL;
  
+       if ((retc=skb_linearize(skb)) < 0)
+               return retc;
         dptr = skb->data + dataoff;
         datalen = skb->len - dataoff;
  
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c

index c539983..6ac986c 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
         return 0;
  }
  
+/*
+ *     register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+       struct ip_vs_proto_data *pd =
+                       kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+       if (!pd) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       pd->pp = pp;    /* For speed issues */
+       pd->next = ipvs->proto_data_table[hash];
+       ipvs->proto_data_table[hash] = pd;
+       atomic_set(&pd->appcnt, 0);     /* Init app counter */
+
+       if (pp->init_netns != NULL)
+               pp->init_netns(net, pd);
+
+       return 0;
+}
  
  /*
   *     unregister an ipvs protocol
@@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
         return -ESRCH;
  }
  
+/*
+ *     unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data **pd_p;
+       unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+       pd_p = &ipvs->proto_data_table[hash];
+       for (; *pd_p; pd_p = &(*pd_p)->next) {
+               if (*pd_p == pd) {
+                       *pd_p = pd->next;
+                       if (pd->pp->exit_netns != NULL)
+                               pd->pp->exit_netns(net, pd);
+                       kfree(pd);
+                       return 0;
+               }
+       }
+
+       return -ESRCH;
+}
  
  /*
   *     get ip_vs_protocol object by its proto.
@@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
  }
  EXPORT_SYMBOL(ip_vs_proto_get);
  
+/*
+ *     get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+{
+       struct ip_vs_proto_data *pd;
+       unsigned hash = IP_VS_PROTO_HASH(proto);
+
+       for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+               if (pd->pp->protocol == proto)
+                       return pd;
+       }
+
+       return NULL;
+}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       return __ipvs_proto_data_get(ipvs, proto);
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
  
  /*
   *     Propagate event for state change to all protocols
   */
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
  {
-       struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
         int i;
  
         for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-               for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
-                       if (pp->timeout_change)
-                               pp->timeout_change(pp, flags);
+               for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+                       if (pd->pp->timeout_change)
+                               pd->pp->timeout_change(pd, flags);
                 }
         }
  }
@@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
                 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
  }
  
+/*
+ * per network name-space init
+ */
+static int __net_init __ip_vs_protocol_init(struct net *net)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+       return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd;
+       int i;
+
+       /* unregister all the ipvs proto data for this netns */
+       for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+               while ((pd = ipvs->proto_data_table[i]) != NULL)
+                       unregister_ip_vs_proto_netns(net, pd);
+       }
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+       .init = __ip_vs_protocol_init,
+       .exit = __ip_vs_protocol_cleanup,
+};
  
  int __init ip_vs_protocol_init(void)
  {
@@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void)
         REGISTER_PROTOCOL(&ip_vs_protocol_esp);
  #endif
         pr_info("Registered protocols (%s)\n", &protocols[2]);
+       return register_pernet_subsys(&ipvs_proto_ops);
  
         return 0;
  }
@@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void)
         struct ip_vs_protocol *pp;
         int i;
  
+       unregister_pernet_subsys(&ipvs_proto_ops);
         /* unregister all the ipvs protocols */
         for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
                 while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c

index 3a04611..5b8eb8b 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,28 +41,30 @@ struct isakmp_hdr {
  #define PORT_ISAKMP    500
  
  static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
-                            int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+                            const struct ip_vs_iphdr *iph, int inverse,
+                            struct ip_vs_conn_param *p)
  {
         if (likely(!inverse))
-               ip_vs_conn_fill_param(af, IPPROTO_UDP,
+               ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
                                       &iph->saddr, htons(PORT_ISAKMP),
                                       &iph->daddr, htons(PORT_ISAKMP), p);
         else
-               ip_vs_conn_fill_param(af, IPPROTO_UDP,
+               ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
                                       &iph->daddr, htons(PORT_ISAKMP),
                                       &iph->saddr, htons(PORT_ISAKMP), p);
  }
  
  static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
                    const struct ip_vs_iphdr *iph, unsigned int proto_off,
                    int inverse)
  {
         struct ip_vs_conn *cp;
         struct ip_vs_conn_param p;
+       struct net *net = skb_net(skb);
  
-       ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+       ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
         cp = ip_vs_conn_in_get(&p);
         if (!cp) {
                 /*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
                 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
                               "%s%s %s->%s\n",
                               inverse ? "ICMP+" : "",
-                             pp->name,
+                             ip_vs_proto_get(iph->protocol)->name,
                               IP_VS_DBG_ADDR(af, &iph->saddr),
                               IP_VS_DBG_ADDR(af, &iph->daddr));
         }
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
  
  static struct ip_vs_conn *
  ah_esp_conn_out_get(int af, const struct sk_buff *skb,
-                   struct ip_vs_protocol *pp,
                     const struct ip_vs_iphdr *iph,
                     unsigned int proto_off,
                     int inverse)
  {
         struct ip_vs_conn *cp;
         struct ip_vs_conn_param p;
+       struct net *net = skb_net(skb);
  
-       ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+       ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
         cp = ip_vs_conn_out_get(&p);
         if (!cp) {
                 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
                               "%s%s %s->%s\n",
                               inverse ? "ICMP+" : "",
-                             pp->name,
+                             ip_vs_proto_get(iph->protocol)->name,
                               IP_VS_DBG_ADDR(af, &iph->saddr),
                               IP_VS_DBG_ADDR(af, &iph->daddr));
         }
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
  
  
  static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                      int *verdict, struct ip_vs_conn **cpp)
  {
         /*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
         return 0;
  }
  
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-       /* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-       /* nothing to do now */
-}
-
-
  #ifdef CONFIG_IP_VS_PROTO_AH
  struct ip_vs_protocol ip_vs_protocol_ah = {
         .name =                 "AH",
         .protocol =             IPPROTO_AH,
         .num_states =           1,
         .dont_defrag =          1,
-       .init =                 ah_esp_init,
-       .exit =                 ah_esp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
         .conn_schedule =        ah_esp_conn_schedule,
         .conn_in_get =          ah_esp_conn_in_get,
         .conn_out_get =         ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
         .app_conn_bind =        NULL,
         .debug_packet =         ip_vs_tcpudp_debug_packet,
         .timeout_change =       NULL,           /* ISAKMP */
-       .set_state_timeout =    NULL,
  };
  #endif
  
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
         .protocol =             IPPROTO_ESP,
         .num_states =           1,
         .dont_defrag =          1,
-       .init =                 ah_esp_init,
-       .exit =                 ah_esp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
         .conn_schedule =        ah_esp_conn_schedule,
         .conn_in_get =          ah_esp_conn_in_get,
         .conn_out_get =         ah_esp_conn_out_get,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c

index 1ea96bc..fb2d04a 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,9 +9,10 @@
  #include <net/ip_vs.h>
  
  static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                    int *verdict, struct ip_vs_conn **cpp)
  {
+       struct net *net;
         struct ip_vs_service *svc;
         sctp_chunkhdr_t _schunkh, *sch;
         sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                                  sizeof(_schunkh), &_schunkh);
         if (sch == NULL)
                 return 0;
-
+       net = skb_net(skb);
         if ((sch->type == SCTP_CID_INIT) &&
-           (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+           (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
                                      &iph.daddr, sh->dest))) {
                 int ignored;
  
-               if (ip_vs_todrop()) {
+               if (ip_vs_todrop(net_ipvs(net))) {
                         /*
                          * It seems that we are very loaded.
                          * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                  * Let the virtual server select a real server for the
                  * incoming connection, and create a connection entry.
                  */
-               *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pd);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                         return 0;
                 }
                 ip_vs_service_put(svc);
         }
-
+       /* NF_ACCEPT */
         return 1;
  }
  
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
  /*
   *      Timeout table[state]
   */
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
         [IP_VS_SCTP_S_NONE]         =     2 * HZ,
         [IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
         [IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
         return "?";
  }
  
-static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-}
-
-static int
-sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-
-return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
-                               sctp_state_name_table, sname, to);
-}
-
  static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                 int direction, const struct sk_buff *skb)
  {
         sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
  
                 IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
                                 "%s:%d state: %s->%s conn->refcnt:%d\n",
-                               pp->name,
+                               pd->pp->name,
                                 ((direction == IP_VS_DIR_OUTPUT) ?
                                  "output " : "input "),
                                 IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
                         }
                 }
         }
+       if (likely(pd))
+               cp->timeout = pd->timeout_table[cp->state = next_state];
+       else    /* What to do ? */
+               cp->timeout = sctp_timeouts[cp->state = next_state];
  
-        cp->timeout = pp->timeout_table[cp->state = next_state];
-
-        return 1;
+       return 1;
  }
  
  static int
  sctp_state_transition(struct ip_vs_conn *cp, int direction,
-               const struct sk_buff *skb, struct ip_vs_protocol *pp)
+               const struct sk_buff *skb, struct ip_vs_proto_data *pd)
  {
         int ret = 0;
  
         spin_lock(&cp->lock);
-       ret = set_sctp_state(pp, cp, direction, skb);
+       ret = set_sctp_state(pd, cp, direction, skb);
         spin_unlock(&cp->lock);
  
         return ret;
  }
  
-/*
- *      Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS        4
-#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
  static inline __u16 sctp_app_hashkey(__be16 port)
  {
         return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
                 & SCTP_APP_TAB_MASK;
  }
  
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
  {
         struct ip_vs_app *i;
         __u16 hash;
         __be16 port = inc->port;
         int ret = 0;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
  
         hash = sctp_app_hashkey(port);
  
-       spin_lock_bh(&sctp_app_lock);
-       list_for_each_entry(i, &sctp_apps[hash], p_list) {
+       spin_lock_bh(&ipvs->sctp_app_lock);
+       list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
                 if (i->port == port) {
                         ret = -EEXIST;
                         goto out;
                 }
         }
-       list_add(&inc->p_list, &sctp_apps[hash]);
-       atomic_inc(&ip_vs_protocol_sctp.appcnt);
+       list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+       atomic_inc(&pd->appcnt);
  out:
-       spin_unlock_bh(&sctp_app_lock);
+       spin_unlock_bh(&ipvs->sctp_app_lock);
  
         return ret;
  }
  
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
  {
-       spin_lock_bh(&sctp_app_lock);
-       atomic_dec(&ip_vs_protocol_sctp.appcnt);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+       spin_lock_bh(&ipvs->sctp_app_lock);
+       atomic_dec(&pd->appcnt);
         list_del(&inc->p_list);
-       spin_unlock_bh(&sctp_app_lock);
+       spin_unlock_bh(&ipvs->sctp_app_lock);
  }
  
  static int sctp_app_conn_bind(struct ip_vs_conn *cp)
  {
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
         int hash;
         struct ip_vs_app *inc;
         int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
         /* Lookup application incarnations and bind the right one */
         hash = sctp_app_hashkey(cp->vport);
  
-       spin_lock(&sctp_app_lock);
-       list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+       spin_lock(&ipvs->sctp_app_lock);
+       list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
                 if (inc->port == cp->vport) {
                         if (unlikely(!ip_vs_app_inc_get(inc)))
                                 break;
-                       spin_unlock(&sctp_app_lock);
+                       spin_unlock(&ipvs->sctp_app_lock);
  
                         IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
                                         "%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
                         goto out;
                 }
         }
-       spin_unlock(&sctp_app_lock);
+       spin_unlock(&ipvs->sctp_app_lock);
  out:
         return result;
  }
  
-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
  {
-       IP_VS_INIT_HASH_TABLE(sctp_apps);
-       pp->timeout_table = sctp_timeouts;
-}
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
+       ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+       spin_lock_init(&ipvs->tcp_app_lock);
+       pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
+                                                       sizeof(sctp_timeouts));
+}
  
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
  {
-
+       kfree(pd->timeout_table);
  }
  
  struct ip_vs_protocol ip_vs_protocol_sctp = {
-       .name = "SCTP",
-       .protocol = IPPROTO_SCTP,
-       .num_states = IP_VS_SCTP_S_LAST,
-       .dont_defrag = 0,
-       .appcnt = ATOMIC_INIT(0),
-       .init = ip_vs_sctp_init,
-       .exit = ip_vs_sctp_exit,
-       .register_app = sctp_register_app,
+       .name           = "SCTP",
+       .protocol       = IPPROTO_SCTP,
+       .num_states     = IP_VS_SCTP_S_LAST,
+       .dont_defrag    = 0,
+       .init           = NULL,
+       .exit           = NULL,
+       .init_netns     = __ip_vs_sctp_init,
+       .exit_netns     = __ip_vs_sctp_exit,
+       .register_app   = sctp_register_app,
         .unregister_app = sctp_unregister_app,
-       .conn_schedule = sctp_conn_schedule,
-       .conn_in_get = ip_vs_conn_in_get_proto,
-       .conn_out_get = ip_vs_conn_out_get_proto,
-       .snat_handler = sctp_snat_handler,
-       .dnat_handler = sctp_dnat_handler,
-       .csum_check = sctp_csum_check,
-       .state_name = sctp_state_name,
+       .conn_schedule  = sctp_conn_schedule,
+       .conn_in_get    = ip_vs_conn_in_get_proto,
+       .conn_out_get   = ip_vs_conn_out_get_proto,
+       .snat_handler   = sctp_snat_handler,
+       .dnat_handler   = sctp_dnat_handler,
+       .csum_check     = sctp_csum_check,
+       .state_name     = sctp_state_name,
         .state_transition = sctp_state_transition,
-       .app_conn_bind = sctp_app_conn_bind,
-       .debug_packet = ip_vs_tcpudp_debug_packet,
-       .timeout_change = sctp_timeout_change,
-       .set_state_timeout = sctp_set_state_timeout,
+       .app_conn_bind  = sctp_app_conn_bind,
+       .debug_packet   = ip_vs_tcpudp_debug_packet,
+       .timeout_change = NULL,
  };
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c

index f6c5200..c0cc341 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
   *              as published by the Free Software Foundation; either version
   *              2 of the License, or (at your option) any later version.
   *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
   *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              tcp_timeouts table has copy per netns in a hash table per
+ *              protocol ip_vs_proto_data and is handled by netns
   */
  
  #define KMSG_COMPONENT "IPVS"
@@ -28,9 +32,10 @@
  #include <net/ip_vs.h>
  
  static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                   int *verdict, struct ip_vs_conn **cpp)
  {
+       struct net *net;
         struct ip_vs_service *svc;
         struct tcphdr _tcph, *th;
         struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 *verdict = NF_DROP;
                 return 0;
         }
-
+       net = skb_net(skb);
         /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
         if (th->syn &&
-           (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
-                                    th->dest))) {
+           (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+                                    &iph.daddr, th->dest))) {
                 int ignored;
  
-               if (ip_vs_todrop()) {
+               if (ip_vs_todrop(net_ipvs(net))) {
                         /*
                          * It seems that we are very loaded.
                          * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                  * Let the virtual server select a real server for the
                  * incoming connection, and create a connection entry.
                  */
-               *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pd);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                         return 0;
                 }
                 ip_vs_service_put(svc);
         }
+       /* NF_ACCEPT */
         return 1;
  }
  
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
  /*
   *     Timeout table[state]
   */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
         [IP_VS_TCP_S_NONE]              =       2*HZ,
         [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
         [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
  /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
  };
  
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
  {
         int on = (flags & 1);           /* secure_tcp */
  
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
         ** for most if not for all of the applications. Something
         ** like "capabilities" (flags) for each object.
         */
-       tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-       return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
-                                      tcp_state_name_table, sname, to);
+       pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
  }
  
  static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
  }
  
  static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
               int direction, struct tcphdr *th)
  {
         int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
                 goto tcp_state_out;
         }
  
-       new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+       new_state =
+               pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
  
    tcp_state_out:
         if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
  
                 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
                               "%s:%d state: %s->%s conn->refcnt:%d\n",
-                             pp->name,
+                             pd->pp->name,
                               ((state_off == TCP_DIR_OUTPUT) ?
                                "output " : "input "),
                               th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
                 }
         }
  
-       cp->timeout = pp->timeout_table[cp->state = new_state];
+       if (likely(pd))
+               cp->timeout = pd->timeout_table[cp->state = new_state];
+       else    /* What to do ? */
+               cp->timeout = tcp_timeouts[cp->state = new_state];
  }
  
-
  /*
   *     Handle state transitions
   */
  static int
  tcp_state_transition(struct ip_vs_conn *cp, int direction,
                      const struct sk_buff *skb,
-                    struct ip_vs_protocol *pp)
+                    struct ip_vs_proto_data *pd)
  {
         struct tcphdr _tcph, *th;
  
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
                 return 0;
  
         spin_lock(&cp->lock);
-       set_tcp_state(pp, cp, direction, th);
+       set_tcp_state(pd, cp, direction, th);
         spin_unlock(&cp->lock);
  
         return 1;
  }
  
-
-/*
- *     Hash table for TCP application incarnations
- */
-#define        TCP_APP_TAB_BITS        4
-#define        TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
-#define        TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
  static inline __u16 tcp_app_hashkey(__be16 port)
  {
         return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
  }
  
  
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
  {
         struct ip_vs_app *i;
         __u16 hash;
         __be16 port = inc->port;
         int ret = 0;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
  
         hash = tcp_app_hashkey(port);
  
-       spin_lock_bh(&tcp_app_lock);
-       list_for_each_entry(i, &tcp_apps[hash], p_list) {
+       spin_lock_bh(&ipvs->tcp_app_lock);
+       list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
                 if (i->port == port) {
                         ret = -EEXIST;
                         goto out;
                 }
         }
-       list_add(&inc->p_list, &tcp_apps[hash]);
-       atomic_inc(&ip_vs_protocol_tcp.appcnt);
+       list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+       atomic_inc(&pd->appcnt);
  
    out:
-       spin_unlock_bh(&tcp_app_lock);
+       spin_unlock_bh(&ipvs->tcp_app_lock);
         return ret;
  }
  
  
  static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
  {
-       spin_lock_bh(&tcp_app_lock);
-       atomic_dec(&ip_vs_protocol_tcp.appcnt);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+       spin_lock_bh(&ipvs->tcp_app_lock);
+       atomic_dec(&pd->appcnt);
         list_del(&inc->p_list);
-       spin_unlock_bh(&tcp_app_lock);
+       spin_unlock_bh(&ipvs->tcp_app_lock);
  }
  
  
  static int
  tcp_app_conn_bind(struct ip_vs_conn *cp)
  {
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
         int hash;
         struct ip_vs_app *inc;
         int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
         /* Lookup application incarnations and bind the right one */
         hash = tcp_app_hashkey(cp->vport);
  
-       spin_lock(&tcp_app_lock);
-       list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+       spin_lock(&ipvs->tcp_app_lock);
+       list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
                 if (inc->port == cp->vport) {
                         if (unlikely(!ip_vs_app_inc_get(inc)))
                                 break;
-                       spin_unlock(&tcp_app_lock);
+                       spin_unlock(&ipvs->tcp_app_lock);
  
                         IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
                                       "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
                         goto out;
                 }
         }
-       spin_unlock(&tcp_app_lock);
+       spin_unlock(&ipvs->tcp_app_lock);
  
    out:
         return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
  /*
   *     Set LISTEN timeout. (ip_vs_conn_put will setup timer)
   */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
  {
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
         spin_lock(&cp->lock);
         cp->state = IP_VS_TCP_S_LISTEN;
-       cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+       cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+                          : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
         spin_unlock(&cp->lock);
  }
  
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
  {
-       IP_VS_INIT_HASH_TABLE(tcp_apps);
-       pp->timeout_table = tcp_timeouts;
-}
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
+       ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+       spin_lock_init(&ipvs->tcp_app_lock);
+       pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+                                                       sizeof(tcp_timeouts));
+       pd->tcp_state_table =  tcp_states;
+}
  
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
  {
+       kfree(pd->timeout_table);
  }
  
  
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
         .protocol =             IPPROTO_TCP,
         .num_states =           IP_VS_TCP_S_LAST,
         .dont_defrag =          0,
-       .appcnt =               ATOMIC_INIT(0),
-       .init =                 ip_vs_tcp_init,
-       .exit =                 ip_vs_tcp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
+       .init_netns =           __ip_vs_tcp_init,
+       .exit_netns =           __ip_vs_tcp_exit,
         .register_app =         tcp_register_app,
         .unregister_app =       tcp_unregister_app,
         .conn_schedule =        tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
         .app_conn_bind =        tcp_app_conn_bind,
         .debug_packet =         ip_vs_tcpudp_debug_packet,
         .timeout_change =       tcp_timeout_change,
-       .set_state_timeout =    tcp_set_state_timeout,
  };
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c

index 9d106a0..f1282cb 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
   *              as published by the Free Software Foundation; either version
   *              2 of the License, or (at your option) any later version.
   *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
   *
   */
  
@@ -28,9 +29,10 @@
  #include <net/ip6_checksum.h>
  
  static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                   int *verdict, struct ip_vs_conn **cpp)
  {
+       struct net *net;
         struct ip_vs_service *svc;
         struct udphdr _udph, *uh;
         struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 *verdict = NF_DROP;
                 return 0;
         }
-
-       svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+       net = skb_net(skb);
+       svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
                                 &iph.daddr, uh->dest);
         if (svc) {
                 int ignored;
  
-               if (ip_vs_todrop()) {
+               if (ip_vs_todrop(net_ipvs(net))) {
                         /*
                          * It seems that we are very loaded.
                          * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                  * Let the virtual server select a real server for the
                  * incoming connection, and create a connection entry.
                  */
-               *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pd);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                         return 0;
                 }
                 ip_vs_service_put(svc);
         }
+       /* NF_ACCEPT */
         return 1;
  }
  
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
         return 1;
  }
  
-
-/*
- *     Note: the caller guarantees that only one of register_app,
- *     unregister_app or app_conn_bind is called each time.
- */
-
-#define        UDP_APP_TAB_BITS        4
-#define        UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
-#define        UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
  static inline __u16 udp_app_hashkey(__be16 port)
  {
         return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
  }
  
  
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
  {
         struct ip_vs_app *i;
         __u16 hash;
         __be16 port = inc->port;
         int ret = 0;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
  
         hash = udp_app_hashkey(port);
  
  
-       spin_lock_bh(&udp_app_lock);
-       list_for_each_entry(i, &udp_apps[hash], p_list) {
+       spin_lock_bh(&ipvs->udp_app_lock);
+       list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
                 if (i->port == port) {
                         ret = -EEXIST;
                         goto out;
                 }
         }
-       list_add(&inc->p_list, &udp_apps[hash]);
-       atomic_inc(&ip_vs_protocol_udp.appcnt);
+       list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+       atomic_inc(&pd->appcnt);
  
    out:
-       spin_unlock_bh(&udp_app_lock);
+       spin_unlock_bh(&ipvs->udp_app_lock);
         return ret;
  }
  
  
  static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
  {
-       spin_lock_bh(&udp_app_lock);
-       atomic_dec(&ip_vs_protocol_udp.appcnt);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       spin_lock_bh(&ipvs->udp_app_lock);
+       atomic_dec(&pd->appcnt);
         list_del(&inc->p_list);
-       spin_unlock_bh(&udp_app_lock);
+       spin_unlock_bh(&ipvs->udp_app_lock);
  }
  
  
  static int udp_app_conn_bind(struct ip_vs_conn *cp)
  {
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
         int hash;
         struct ip_vs_app *inc;
         int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
         /* Lookup application incarnations and bind the right one */
         hash = udp_app_hashkey(cp->vport);
  
-       spin_lock(&udp_app_lock);
-       list_for_each_entry(inc, &udp_apps[hash], p_list) {
+       spin_lock(&ipvs->udp_app_lock);
+       list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
                 if (inc->port == cp->vport) {
                         if (unlikely(!ip_vs_app_inc_get(inc)))
                                 break;
-                       spin_unlock(&udp_app_lock);
+                       spin_unlock(&ipvs->udp_app_lock);
  
                         IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
                                       "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
                         goto out;
                 }
         }
-       spin_unlock(&udp_app_lock);
+       spin_unlock(&ipvs->udp_app_lock);
  
    out:
         return result;
  }
  
  
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
         [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
         [IP_VS_UDP_S_LAST]              =       2*HZ,
  };
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
         [IP_VS_UDP_S_LAST]              =       "BUG!",
  };
  
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-       return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-                                      udp_state_name_table, sname, to);
-}
-
  static const char * udp_state_name(int state)
  {
         if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
  static int
  udp_state_transition(struct ip_vs_conn *cp, int direction,
                      const struct sk_buff *skb,
-                    struct ip_vs_protocol *pp)
+                    struct ip_vs_proto_data *pd)
  {
-       cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+       if (unlikely(!pd)) {
+               pr_err("UDP no ns data\n");
+               return 0;
+       }
+
+       cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
         return 1;
  }
  
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
  {
-       IP_VS_INIT_HASH_TABLE(udp_apps);
-       pp->timeout_table = udp_timeouts;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+       spin_lock_init(&ipvs->udp_app_lock);
+       pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+                                                       sizeof(udp_timeouts));
  }
  
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
  {
+       kfree(pd->timeout_table);
  }
  
  
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
         .protocol =             IPPROTO_UDP,
         .num_states =           IP_VS_UDP_S_LAST,
         .dont_defrag =          0,
-       .init =                 udp_init,
-       .exit =                 udp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
+       .init_netns =           __udp_init,
+       .exit_netns =           __udp_exit,
         .conn_schedule =        udp_conn_schedule,
         .conn_in_get =          ip_vs_conn_in_get_proto,
         .conn_out_get =         ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
         .app_conn_bind =        udp_app_conn_bind,
         .debug_packet =         ip_vs_tcpudp_debug_packet,
         .timeout_change =       NULL,
-       .set_state_timeout =    udp_set_state_timeout,
  };
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c

index ab85aed..d1adf98 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
   *              high-performance and highly available server based on a
   *              cluster of servers.
   *
+ * Version 1,   is capable of handling both version 0 and 1 messages.
+ *              Version 0 is the plain old format.
+ *              Note Version 0 receivers will just drop Ver 1 messages.
+ *              Version 1 is capable of handle IPv6, Persistence data,
+ *              time-outs, and firewall marks.
+ *              In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ *              Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions  Message: is a complete datagram
+ *              Sync_conn: is a part of a Message
+ *              Param Data is an option to a Sync_conn.
+ *
   * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   *
   * ip_vs_sync:  sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
   *     Alexandre Cassen        :       Added SyncID support for incoming sync
   *                                     messages filtering.
   *     Justin Ossevoort        :       Fix endian problem on sync message size.
+ *     Hans Schillstrom        :       Added Version 1: i.e. IPv6,
+ *                                     Persistence support, fwmark and time-out.
   */
  
  #define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
  #include <linux/wait.h>
  #include <linux/kernel.h>
  
+#include <asm/unaligned.h>             /* Used for ntoh_seq and hton_seq */
+
  #include <net/ip.h>
  #include <net/sock.h>
  
@@ -43,11 +59,13 @@
  #define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
  #define IP_VS_SYNC_PORT  8848          /* multicast port */
  
+#define SYNC_PROTO_VER  1              /* Protocol version in header */
  
  /*
   *     IPVS sync connection entry
+ *     Version 0, i.e. original version.
   */
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
         __u8                    reserved;
  
         /* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
         struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
  };
  
+/*
+     Sync Connection format (sync_conn)
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |    Type       |    Protocol   | Ver.  |        Size           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             Flags                             |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            State              |         cport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            vport              |         dport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             fwmark                            |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             timeout  (in sec.)                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                              ...                              |
+      |                        IP-Addresses  (v4 or v6)               |
+      |                              ...                              |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  Optional Parameters.
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | Param. Type    | Param. Length |   Param. data                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
+      |                              ...                              |
+      |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                               | Param Type    | Param. Length |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                           Param  data                         |
+      |         Last Param data should be padded for 32 bit alignment |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ *  Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+       __u8                    type;
+       __u8                    protocol;       /* Which protocol (TCP/UDP) */
+       __be16                  ver_size;       /* Version msb 4 bits */
+       /* Flags and state transition */
+       __be32                  flags;          /* status flags */
+       __be16                  state;          /* state info   */
+       /* Protocol, addresses and port numbers */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __be32                  fwmark;         /* Firewall mark from skb */
+       __be32                  timeout;        /* cp timeout */
+       __be32                  caddr;          /* client address */
+       __be32                  vaddr;          /* virtual address */
+       __be32                  daddr;          /* destination address */
+       /* The sequence options start here */
+       /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+       __u8                    type;
+       __u8                    protocol;       /* Which protocol (TCP/UDP) */
+       __be16                  ver_size;       /* Version msb 4 bits */
+       /* Flags and state transition */
+       __be32                  flags;          /* status flags */
+       __be16                  state;          /* state info   */
+       /* Protocol, addresses and port numbers */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __be32                  fwmark;         /* Firewall mark from skb */
+       __be32                  timeout;        /* cp timeout */
+       struct in6_addr         caddr;          /* client address */
+       struct in6_addr         vaddr;          /* virtual address */
+       struct in6_addr         daddr;          /* destination address */
+       /* The sequence options start here */
+       /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+       struct ip_vs_sync_v4    v4;
+       struct ip_vs_sync_v6    v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6            0
+#define STYPE_F_INET6          (1 << STYPE_INET6)
+
+#define SVER_SHIFT             12              /* Shift to get version */
+#define SVER_MASK              0x0fff          /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA      1
+#define IPVS_OPT_PE_DATA       2
+#define IPVS_OPT_PE_NAME       3
+#define IPVS_OPT_PARAM         7
+
+#define IPVS_OPT_F_SEQ_DATA    (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA     (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME     (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM       (1 << (IPVS_OPT_PARAM-1))
+
  struct ip_vs_sync_thread_data {
+       struct net *net;
         struct socket *sock;
         char *buf;
  };
  
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn_v0))
  #define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
  
  
  /*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
+  The master mulitcasts messages (Datagrams) to the backup load balancers
+  in the following format.
+
+ Version 1:
+  Note, first byte should be Zero, so ver 0 receivers will drop the packet.
  
         0                   1                   2                   3
         0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
+      |      0        |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    Version    |    Reserved, set to Zero      |
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        |                                                               |
        |                    IPVS Sync Connection (1)                   |
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        |                            .                                  |
-      |                            .                                  |
+      ~                            .                                  ~
        |                            .                                  |
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
        |                                                               |
        |                    IPVS Sync Connection (n)                   |
        +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                    IPVS Sync Connection (1)                   |
  */
  
  #define SYNC_MESG_HEADER_LEN   4
  #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
  
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
         __u8                    nr_conns;
         __u8                    syncid;
         __u16                   size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
         /* ip_vs_sync_conn entries start here */
  };
  
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+       __u8                    reserved;       /* must be zero */
+       __u8                    syncid;
+       __u16                   size;
+       __u8                    nr_conns;
+       __s8                    version;        /* SYNC_PROTO_VER  */
+       __u16                   spare;
+       /* ip_vs_sync_conn entries start here */
+};
  
  struct ip_vs_sync_buff {
         struct list_head        list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
         unsigned char           *end;
  };
  
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
  /* multicast addr */
  static struct sockaddr_in mcast_addr = {
         .sin_family             = AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
         .sin_addr.s_addr        = cpu_to_be32(IP_VS_SYNC_GROUP),
  };
  
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+       ho->init_seq       = get_unaligned_be32(&no->init_seq);
+       ho->delta          = get_unaligned_be32(&no->delta);
+       ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+       put_unaligned_be32(ho->init_seq, &no->init_seq);
+       put_unaligned_be32(ho->delta, &no->delta);
+       put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
  
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
  {
         struct ip_vs_sync_buff *sb;
  
-       spin_lock_bh(&ip_vs_sync_lock);
-       if (list_empty(&ip_vs_sync_queue)) {
+       spin_lock_bh(&ipvs->sync_lock);
+       if (list_empty(&ipvs->sync_queue)) {
                 sb = NULL;
         } else {
-               sb = list_entry(ip_vs_sync_queue.next,
+               sb = list_entry(ipvs->sync_queue.next,
                                 struct ip_vs_sync_buff,
                                 list);
                 list_del(&sb->list);
         }
-       spin_unlock_bh(&ip_vs_sync_lock);
+       spin_unlock_bh(&ipvs->sync_lock);
  
         return sb;
  }
  
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
  {
         struct ip_vs_sync_buff *sb;
  
         if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
                 return NULL;
  
-       if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+       sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+       if (!sb->mesg) {
                 kfree(sb);
                 return NULL;
         }
+       sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zeo now */
+       sb->mesg->version = SYNC_PROTO_VER;
+       sb->mesg->syncid = ipvs->master_syncid;
+       sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
         sb->mesg->nr_conns = 0;
-       sb->mesg->syncid = ip_vs_master_syncid;
-       sb->mesg->size = 4;
-       sb->head = (unsigned char *)sb->mesg + 4;
-       sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+       sb->mesg->spare = 0;
+       sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
+       sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+
         sb->firstuse = jiffies;
         return sb;
  }
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
         kfree(sb);
  }
  
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
  {
-       spin_lock(&ip_vs_sync_lock);
-       if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-               list_add_tail(&sb->list, &ip_vs_sync_queue);
+       struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+       spin_lock(&ipvs->sync_lock);
+       if (ipvs->sync_state & IP_VS_STATE_MASTER)
+               list_add_tail(&sb->list, &ipvs->sync_queue);
         else
                 ip_vs_sync_buff_release(sb);
-       spin_unlock(&ip_vs_sync_lock);
+       spin_unlock(&ipvs->sync_lock);
  }
  
  /*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
   *     than the specified time or the specified time is zero.
   */
  static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
  {
         struct ip_vs_sync_buff *sb;
  
-       spin_lock_bh(&curr_sb_lock);
-       if (curr_sb && (time == 0 ||
-                       time_before(jiffies - curr_sb->firstuse, time))) {
-               sb = curr_sb;
-               curr_sb = NULL;
+       spin_lock_bh(&ipvs->sync_buff_lock);
+       if (ipvs->sync_buff && (time == 0 ||
+           time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+               sb = ipvs->sync_buff;
+               ipvs->sync_buff = NULL;
         } else
                 sb = NULL;
-       spin_unlock_bh(&curr_sb_lock);
+       spin_unlock_bh(&ipvs->sync_buff_lock);
         return sb;
  }
  
+/*
+ * Switch mode from sending version 0 or 1
+ *  - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       if (!ipvs->sync_state & IP_VS_STATE_MASTER)
+               return;
+       if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+               return;
+
+       spin_lock_bh(&ipvs->sync_buff_lock);
+       /* Buffer empty ? then let buf_create do the job  */
+       if (ipvs->sync_buff->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
+               kfree(ipvs->sync_buff);
+               ipvs->sync_buff = NULL;
+       } else {
+               spin_lock_bh(&ipvs->sync_lock);
+               if (ipvs->sync_state & IP_VS_STATE_MASTER)
+                       list_add_tail(&ipvs->sync_buff->list,
+                                     &ipvs->sync_queue);
+               else
+                       ip_vs_sync_buff_release(ipvs->sync_buff);
+               spin_unlock_bh(&ipvs->sync_lock);
+       }
+       spin_unlock_bh(&ipvs->sync_buff_lock);
+}
  
  /*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+{
+       struct ip_vs_sync_buff *sb;
+       struct ip_vs_sync_mesg_v0 *mesg;
+
+       if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+               return NULL;
+
+       sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+       if (!sb->mesg) {
+               kfree(sb);
+               return NULL;
+       }
+       mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+       mesg->nr_conns = 0;
+       mesg->syncid = ipvs->master_syncid;
+       mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+       sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+       sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+       sb->firstuse = jiffies;
+       return sb;
+}
+
+/*
+ *      Version 0 , could be switched in by sys_ctl.
   *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
   */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
  {
-       struct ip_vs_sync_mesg *m;
-       struct ip_vs_sync_conn *s;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_sync_mesg_v0 *m;
+       struct ip_vs_sync_conn_v0 *s;
         int len;
  
-       spin_lock(&curr_sb_lock);
-       if (!curr_sb) {
-               if (!(curr_sb=ip_vs_sync_buff_create())) {
-                       spin_unlock(&curr_sb_lock);
+       if (unlikely(cp->af != AF_INET))
+               return;
+       /* Do not sync ONE PACKET */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               return;
+
+       spin_lock(&ipvs->sync_buff_lock);
+       if (!ipvs->sync_buff) {
+               ipvs->sync_buff =
+                       ip_vs_sync_buff_create_v0(ipvs);
+               if (!ipvs->sync_buff) {
+                       spin_unlock(&ipvs->sync_buff_lock);
                         pr_err("ip_vs_sync_buff_create failed.\n");
                         return;
                 }
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
  
         len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
                 SIMPLE_CONN_SIZE;
-       m = curr_sb->mesg;
-       s = (struct ip_vs_sync_conn *)curr_sb->head;
+       m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+       s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
  
         /* copy members */
+       s->reserved = 0;
         s->protocol = cp->protocol;
         s->cport = cp->cport;
         s->vport = cp->vport;
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
  
         m->nr_conns++;
         m->size += len;
-       curr_sb->head += len;
+       ipvs->sync_buff->head += len;
  
         /* check if there is a space for next one */
-       if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-               sb_queue_tail(curr_sb);
-               curr_sb = NULL;
+       if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+               sb_queue_tail(ipvs);
+               ipvs->sync_buff = NULL;
         }
-       spin_unlock(&curr_sb_lock);
+       spin_unlock(&ipvs->sync_buff_lock);
  
         /* synchronize its controller if it has */
         if (cp->control)
-               ip_vs_sync_conn(cp->control);
+               ip_vs_sync_conn(net, cp->control);
+}
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ *      Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_sync_mesg *m;
+       union ip_vs_sync_conn *s;
+       __u8 *p;
+       unsigned int len, pe_name_len, pad;
+
+       /* Handle old version of the protocol */
+       if (ipvs->sysctl_sync_ver == 0) {
+               ip_vs_sync_conn_v0(net, cp);
+               return;
+       }
+       /* Do not sync ONE PACKET */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               goto control;
+sloop:
+       /* Sanity checks */
+       pe_name_len = 0;
+       if (cp->pe_data_len) {
+               if (!cp->pe_data || !cp->dest) {
+                       IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+                       return;
+               }
+               pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+       }
+
+       spin_lock(&ipvs->sync_buff_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               len = sizeof(struct ip_vs_sync_v6);
+       else
+#endif
+               len = sizeof(struct ip_vs_sync_v4);
+
+       if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+               len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+       if (cp->pe_data_len)
+               len += cp->pe_data_len + 2;     /* + Param hdr field */
+       if (pe_name_len)
+               len += pe_name_len + 2;
+
+       /* check if there is a space for this one  */
+       pad = 0;
+       if (ipvs->sync_buff) {
+               pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+               if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+                       sb_queue_tail(ipvs);
+                       ipvs->sync_buff = NULL;
+                       pad = 0;
+               }
+       }
+
+       if (!ipvs->sync_buff) {
+               ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+               if (!ipvs->sync_buff) {
+                       spin_unlock(&ipvs->sync_buff_lock);
+                       pr_err("ip_vs_sync_buff_create failed.\n");
+                       return;
+               }
+       }
+
+       m = ipvs->sync_buff->mesg;
+       p = ipvs->sync_buff->head;
+       ipvs->sync_buff->head += pad + len;
+       m->size += pad + len;
+       /* Add ev. padding from prev. sync_conn */
+       while (pad--)
+               *(p++) = 0;
+
+       s = (union ip_vs_sync_conn *)p;
+
+       /* Set message type  & copy members */
+       s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+       s->v4.ver_size = htons(len & SVER_MASK);        /* Version 0 */
+       s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+       s->v4.state = htons(cp->state);
+       s->v4.protocol = cp->protocol;
+       s->v4.cport = cp->cport;
+       s->v4.vport = cp->vport;
+       s->v4.dport = cp->dport;
+       s->v4.fwmark = htonl(cp->fwmark);
+       s->v4.timeout = htonl(cp->timeout / HZ);
+       m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6) {
+               p += sizeof(struct ip_vs_sync_v6);
+               ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+               ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+               ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+       } else
+#endif
+       {
+               p += sizeof(struct ip_vs_sync_v4);      /* options ptr */
+               s->v4.caddr = cp->caddr.ip;
+               s->v4.vaddr = cp->vaddr.ip;
+               s->v4.daddr = cp->daddr.ip;
+       }
+       if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+               *(p++) = IPVS_OPT_SEQ_DATA;
+               *(p++) = sizeof(struct ip_vs_sync_conn_options);
+               hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+               p += sizeof(struct ip_vs_seq);
+               hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+               p += sizeof(struct ip_vs_seq);
+       }
+       /* Handle pe data */
+       if (cp->pe_data_len && cp->pe_data) {
+               *(p++) = IPVS_OPT_PE_DATA;
+               *(p++) = cp->pe_data_len;
+               memcpy(p, cp->pe_data, cp->pe_data_len);
+               p += cp->pe_data_len;
+               if (pe_name_len) {
+                       /* Add PE_NAME */
+                       *(p++) = IPVS_OPT_PE_NAME;
+                       *(p++) = pe_name_len;
+                       memcpy(p, cp->pe->name, pe_name_len);
+                       p += pe_name_len;
+               }
+       }
+
+       spin_unlock(&ipvs->sync_buff_lock);
+
+control:
+       /* synchronize its controller if it has */
+       cp = cp->control;
+       if (!cp)
+               return;
+       /*
+        * Reduce sync rate for templates
+        * i.e only increment in_pkts for Templates.
+        */
+       if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+               int pkts = atomic_add_return(1, &cp->in_pkts);
+
+               if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+                       return;
+       }
+       goto sloop;
  }
  
+/*
+ *  fill_param used by version 1
+ */
  static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
-                          const union nf_inet_addr *caddr, __be16 cport,
-                          const union nf_inet_addr *vaddr, __be16 vport,
-                          struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+                          struct ip_vs_conn_param *p,
+                          __u8 *pe_data, unsigned int pe_data_len,
+                          __u8 *pe_name, unsigned int pe_name_len)
  {
-       /* XXX: Need to take into account persistence engine */
-       ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+                                     (const union nf_inet_addr *)&sc->v6.caddr,
+                                     sc->v6.cport,
+                                     (const union nf_inet_addr *)&sc->v6.vaddr,
+                                     sc->v6.vport, p);
+       else
+#endif
+               ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+                                     (const union nf_inet_addr *)&sc->v4.caddr,
+                                     sc->v4.cport,
+                                     (const union nf_inet_addr *)&sc->v4.vaddr,
+                                     sc->v4.vport, p);
+       /* Handle pe data */
+       if (pe_data_len) {
+               if (pe_name_len) {
+                       char buff[IP_VS_PENAME_MAXLEN+1];
+
+                       memcpy(buff, pe_name, pe_name_len);
+                       buff[pe_name_len]=0;
+                       p->pe = __ip_vs_pe_getbyname(buff);
+                       if (!p->pe) {
+                               IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+                                            buff);
+                               return 1;
+                       }
+               } else {
+                       IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+                       return 1;
+               }
+
+               p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+               if (!p->pe_data) {
+                       if (p->pe->module)
+                               module_put(p->pe->module);
+                       return -ENOMEM;
+               }
+               memcpy(p->pe_data, pe_data, pe_data_len);
+               p->pe_data_len = pe_data_len;
+       }
         return 0;
  }
  
  /*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
+ *  Connection Add / Update.
+ *  Common for version 0 and 1 reception of backup sync_conns.
+ *  Param: ...
+ *         timeout is in sec.
   */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+                           unsigned int flags, unsigned int state,
+                           unsigned int protocol, unsigned int type,
+                           const union nf_inet_addr *daddr, __be16 dport,
+                           unsigned long timeout, __u32 fwmark,
+                           struct ip_vs_sync_conn_options *opt)
  {
-       struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-       struct ip_vs_sync_conn *s;
-       struct ip_vs_sync_conn_options *opt;
-       struct ip_vs_conn *cp;
-       struct ip_vs_protocol *pp;
         struct ip_vs_dest *dest;
-       struct ip_vs_conn_param param;
-       char *p;
-       int i;
+       struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs = net_ipvs(net);
  
-       if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-               IP_VS_ERR_RL("sync message header too short\n");
-               return;
-       }
+       if (!(flags & IP_VS_CONN_F_TEMPLATE))
+               cp = ip_vs_conn_in_get(param);
+       else
+               cp = ip_vs_ct_in_get(param);
  
-       /* Convert size back to host byte order */
-       m->size = ntohs(m->size);
+       if (cp && param->pe_data)       /* Free pe_data */
+               kfree(param->pe_data);
+       if (!cp) {
+               /*
+                * Find the appropriate destination for the connection.
+                * If it is not found the connection will remain unbound
+                * but still handled.
+                */
+               dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
+                                      param->vport, protocol, fwmark);
  
-       if (buflen != m->size) {
-               IP_VS_ERR_RL("bogus sync message size\n");
-               return;
+               /*  Set the approprite ativity flag */
+               if (protocol == IPPROTO_TCP) {
+                       if (state != IP_VS_TCP_S_ESTABLISHED)
+                               flags |= IP_VS_CONN_F_INACTIVE;
+                       else
+                               flags &= ~IP_VS_CONN_F_INACTIVE;
+               } else if (protocol == IPPROTO_SCTP) {
+                       if (state != IP_VS_SCTP_S_ESTABLISHED)
+                               flags |= IP_VS_CONN_F_INACTIVE;
+                       else
+                               flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
+               cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+               if (dest)
+                       atomic_dec(&dest->refcnt);
+               if (!cp) {
+                       if (param->pe_data)
+                               kfree(param->pe_data);
+                       IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+                       return;
+               }
+       } else if (!cp->dest) {
+               dest = ip_vs_try_bind_dest(cp);
+               if (dest)
+                       atomic_dec(&dest->refcnt);
+       } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+               (cp->state != state)) {
+               /* update active/inactive flag for the connection */
+               dest = cp->dest;
+               if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                       (state != IP_VS_TCP_S_ESTABLISHED)) {
+                       atomic_dec(&dest->activeconns);
+                       atomic_inc(&dest->inactconns);
+                       cp->flags |= IP_VS_CONN_F_INACTIVE;
+               } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                       (state == IP_VS_TCP_S_ESTABLISHED)) {
+                       atomic_inc(&dest->activeconns);
+                       atomic_dec(&dest->inactconns);
+                       cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
+       } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+               (cp->state != state)) {
+               dest = cp->dest;
+               if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+               (state != IP_VS_SCTP_S_ESTABLISHED)) {
+                       atomic_dec(&dest->activeconns);
+                       atomic_inc(&dest->inactconns);
+                       cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
         }
  
-       /* SyncID sanity check */
-       if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-               IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-                         m->syncid);
-               return;
+       if (opt)
+               memcpy(&cp->in_seq, opt, sizeof(*opt));
+       atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+       cp->state = state;
+       cp->old_state = cp->state;
+       /*
+        * For Ver 0 messages style
+        *  - Not possible to recover the right timeout for templates
+        *  - can not find the right fwmark
+        *    virtual service. If needed, we can do it for
+        *    non-fwmark persistent services.
+        * Ver 1 messages style.
+        *  - No problem.
+        */
+       if (timeout) {
+               if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+                       timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+               cp->timeout = timeout*HZ;
+       } else {
+               struct ip_vs_proto_data *pd;
+
+               pd = ip_vs_proto_data_get(net, protocol);
+               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+                       cp->timeout = pd->timeout_table[state];
+               else
+                       cp->timeout = (3*60*HZ);
         }
+       ip_vs_conn_put(cp);
+}
  
-       p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+/*
+ *  Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+                                    const size_t buflen)
+{
+       struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+       struct ip_vs_sync_conn_v0 *s;
+       struct ip_vs_sync_conn_options *opt;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_conn_param param;
+       char *p;
+       int i;
+
+       p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
         for (i=0; i<m->nr_conns; i++) {
                 unsigned flags, state;
  
                 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-                       IP_VS_ERR_RL("bogus conn in sync message\n");
+                       IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
                         return;
                 }
-               s = (struct ip_vs_sync_conn *) p;
+               s = (struct ip_vs_sync_conn_v0 *) p;
                 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
                 flags &= ~IP_VS_CONN_F_HASHED;
                 if (flags & IP_VS_CONN_F_SEQ_MASK) {
                         opt = (struct ip_vs_sync_conn_options *)&s[1];
                         p += FULL_CONN_SIZE;
                         if (p > buffer+buflen) {
-                               IP_VS_ERR_RL("bogus conn options in sync message\n");
+                               IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
                                 return;
                         }
                 } else {
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
                         pp = ip_vs_proto_get(s->protocol);
                         if (!pp) {
-                               IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
                                         s->protocol);
                                 continue;
                         }
                         if (state >= pp->num_states) {
-                               IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
                                         pp->name, state);
                                 continue;
                         }
                 } else {
                         /* protocol in templates is not used for state/timeout */
-                       pp = NULL;
                         if (state > 0) {
-                               IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
                                         state);
                                 state = 0;
                         }
                 }
  
-               {
-                       if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
-                                             (union nf_inet_addr *)&s->caddr,
-                                             s->cport,
-                                             (union nf_inet_addr *)&s->vaddr,
-                                             s->vport, &param)) {
-                               pr_err("ip_vs_conn_fill_param_sync failed");
-                               return;
+               ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+                                     (const union nf_inet_addr *)&s->caddr,
+                                     s->cport,
+                                     (const union nf_inet_addr *)&s->vaddr,
+                                     s->vport, &param);
+
+               /* Send timeout as Zero */
+               ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+                               (union nf_inet_addr *)&s->daddr, s->dport,
+                               0, 0, opt);
+       }
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+                                   __u32 *opt_flags,
+                                   struct ip_vs_sync_conn_options *opt)
+{
+       struct ip_vs_sync_conn_options *topt;
+
+       topt = (struct ip_vs_sync_conn_options *)p;
+
+       if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+               IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+               return -EINVAL;
+       }
+       if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+               IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+               return -EINVAL;
+       }
+       ntoh_seq(&topt->in_seq, &opt->in_seq);
+       ntoh_seq(&topt->out_seq, &opt->out_seq);
+       *opt_flags |= IPVS_OPT_F_SEQ_DATA;
+       return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+                         __u8 **data, unsigned int maxlen,
+                         __u32 *opt_flags, __u32 flag)
+{
+       if (plen > maxlen) {
+               IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+               return -EINVAL;
+       }
+       if (*opt_flags & flag) {
+               IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+               return -EINVAL;
+       }
+       *data_len = plen;
+       *data = p;
+       *opt_flags |= flag;
+       return 0;
+}
+/*
+ *   Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+{
+       struct ip_vs_sync_conn_options opt;
+       union  ip_vs_sync_conn *s;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_conn_param param;
+       __u32 flags;
+       unsigned int af, state, pe_data_len=0, pe_name_len=0;
+       __u8 *pe_data=NULL, *pe_name=NULL;
+       __u32 opt_flags=0;
+       int retc=0;
+
+       s = (union ip_vs_sync_conn *) p;
+
+       if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+               af = AF_INET6;
+               p += sizeof(struct ip_vs_sync_v6);
+#else
+               IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+               retc = 10;
+               goto out;
+#endif
+       } else if (!s->v4.type) {
+               af = AF_INET;
+               p += sizeof(struct ip_vs_sync_v4);
+       } else {
+               return -10;
+       }
+       if (p > msg_end)
+               return -20;
+
+       /* Process optional params check Type & Len. */
+       while (p < msg_end) {
+               int ptype;
+               int plen;
+
+               if (p+2 > msg_end)
+                       return -30;
+               ptype = *(p++);
+               plen  = *(p++);
+
+               if (!plen || ((p + plen) > msg_end))
+                       return -40;
+               /* Handle seq option  p = param data */
+               switch (ptype & ~IPVS_OPT_F_PARAM) {
+               case IPVS_OPT_SEQ_DATA:
+                       if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+                               return -50;
+                       break;
+
+               case IPVS_OPT_PE_DATA:
+                       if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+                                          IP_VS_PEDATA_MAXLEN, &opt_flags,
+                                          IPVS_OPT_F_PE_DATA))
+                               return -60;
+                       break;
+
+               case IPVS_OPT_PE_NAME:
+                       if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+                                          IP_VS_PENAME_MAXLEN, &opt_flags,
+                                          IPVS_OPT_F_PE_NAME))
+                               return -70;
+                       break;
+
+               default:
+                       /* Param data mandatory ? */
+                       if (!(ptype & IPVS_OPT_F_PARAM)) {
+                               IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+                                         ptype & ~IPVS_OPT_F_PARAM);
+                               retc = 20;
+                               goto out;
                         }
-                       if (!(flags & IP_VS_CONN_F_TEMPLATE))
-                               cp = ip_vs_conn_in_get(&param);
-                       else
-                               cp = ip_vs_ct_in_get(&param);
                 }
-               if (!cp) {
-                       /*
-                        * Find the appropriate destination for the connection.
-                        * If it is not found the connection will remain unbound
-                        * but still handled.
-                        */
-                       dest = ip_vs_find_dest(AF_INET,
-                                              (union nf_inet_addr *)&s->daddr,
-                                              s->dport,
-                                              (union nf_inet_addr *)&s->vaddr,
-                                              s->vport,
-                                              s->protocol);
-                       /*  Set the approprite ativity flag */
-                       if (s->protocol == IPPROTO_TCP) {
-                               if (state != IP_VS_TCP_S_ESTABLISHED)
-                                       flags |= IP_VS_CONN_F_INACTIVE;
-                               else
-                                       flags &= ~IP_VS_CONN_F_INACTIVE;
-                       } else if (s->protocol == IPPROTO_SCTP) {
-                               if (state != IP_VS_SCTP_S_ESTABLISHED)
-                                       flags |= IP_VS_CONN_F_INACTIVE;
-                               else
-                                       flags &= ~IP_VS_CONN_F_INACTIVE;
+               p += plen;  /* Next option */
+       }
+
+       /* Get flags and Mask off unsupported */
+       flags  = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+       flags |= IP_VS_CONN_F_SYNC;
+       state = ntohs(s->v4.state);
+
+       if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+               pp = ip_vs_proto_get(s->v4.protocol);
+               if (!pp) {
+                       IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+                               s->v4.protocol);
+                       retc = 30;
+                       goto out;
+               }
+               if (state >= pp->num_states) {
+                       IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+                               pp->name, state);
+                       retc = 40;
+                       goto out;
+               }
+       } else {
+               /* protocol in templates is not used for state/timeout */
+               if (state > 0) {
+                       IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+                               state);
+                       state = 0;
+               }
+       }
+       if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+                                      pe_data_len, pe_name, pe_name_len)) {
+               retc = 50;
+               goto out;
+       }
+       /* If only IPv4, just silent skip IPv6 */
+       if (af == AF_INET)
+               ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+                               (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+                               ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+                               (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+                               );
+#ifdef CONFIG_IP_VS_IPV6
+       else
+               ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+                               (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+                               ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+                               (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+                               );
+#endif
+       return 0;
+       /* Error exit */
+out:
+       IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+       return retc;
+
+}
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ *      Handles Version 0 & 1
+ */
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+                                 const size_t buflen)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+       __u8 *p, *msg_end;
+       int i, nr_conns;
+
+       if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+               IP_VS_DBG(2, "BACKUP, message header too short\n");
+               return;
+       }
+       /* Convert size back to host byte order */
+       m2->size = ntohs(m2->size);
+
+       if (buflen != m2->size) {
+               IP_VS_DBG(2, "BACKUP, bogus message size\n");
+               return;
+       }
+       /* SyncID sanity check */
+       if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+               IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+               return;
+       }
+       /* Handle version 1  message */
+       if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+           && (m2->spare == 0)) {
+
+               msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+               nr_conns = m2->nr_conns;
+
+               for (i=0; i<nr_conns; i++) {
+                       union ip_vs_sync_conn *s;
+                       unsigned size;
+                       int retc;
+
+                       p = msg_end;
+                       if (p + sizeof(s->v4) > buffer+buflen) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+                               return;
                         }
-                       cp = ip_vs_conn_new(&param,
-                                           (union nf_inet_addr *)&s->daddr,
-                                           s->dport, flags, dest);
-                       if (dest)
-                               atomic_dec(&dest->refcnt);
-                       if (!cp) {
-                               pr_err("ip_vs_conn_new failed\n");
+                       s = (union ip_vs_sync_conn *)p;
+                       size = ntohs(s->v4.ver_size) & SVER_MASK;
+                       msg_end = p + size;
+                       /* Basic sanity checks */
+                       if (msg_end  > buffer+buflen) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
                                 return;
                         }
-               } else if (!cp->dest) {
-                       dest = ip_vs_try_bind_dest(cp);
-                       if (dest)
-                               atomic_dec(&dest->refcnt);
-               } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-                          (cp->state != state)) {
-                       /* update active/inactive flag for the connection */
-                       dest = cp->dest;
-                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                               (state != IP_VS_TCP_S_ESTABLISHED)) {
-                               atomic_dec(&dest->activeconns);
-                               atomic_inc(&dest->inactconns);
-                               cp->flags |= IP_VS_CONN_F_INACTIVE;
-                       } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                               (state == IP_VS_TCP_S_ESTABLISHED)) {
-                               atomic_inc(&dest->activeconns);
-                               atomic_dec(&dest->inactconns);
-                               cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+                                             ntohs(s->v4.ver_size) >> SVER_SHIFT);
+                               return;
                         }
-               } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
-                          (cp->state != state)) {
-                       dest = cp->dest;
-                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                            (state != IP_VS_SCTP_S_ESTABLISHED)) {
-                           atomic_dec(&dest->activeconns);
-                           atomic_inc(&dest->inactconns);
-                           cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       /* Process a single sync_conn */
+                       retc = ip_vs_proc_sync_conn(net, p, msg_end);
+                       if (retc < 0) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+                                            retc);
+                               return;
                         }
+                       /* Make sure we have 32 bit alignment */
+                       msg_end = p + ((size + 3) & ~3);
                 }
-
-               if (opt)
-                       memcpy(&cp->in_seq, opt, sizeof(*opt));
-               atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-               cp->state = state;
-               cp->old_state = cp->state;
-               /*
-                * We can not recover the right timeout for templates
-                * in all cases, we can not find the right fwmark
-                * virtual service. If needed, we can do it for
-                * non-fwmark persistent services.
-                */
-               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-                       cp->timeout = pp->timeout_table[state];
-               else
-                       cp->timeout = (3*60*HZ);
-               ip_vs_conn_put(cp);
+       } else {
+               /* Old type of message */
+               ip_vs_process_message_v0(net, buffer, buflen);
+               return;
         }
  }
  
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  {
         struct net_device *dev;
         struct inet_sock *inet = inet_sk(sk);
+       struct net *net = sock_net(sk);
  
-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev)
                 return -ENODEV;
  
         if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
   *     Set the maximum length of sync message according to the
   *     specified interface's MTU.
   */
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         struct net_device *dev;
         int num;
  
         if (sync_state == IP_VS_STATE_MASTER) {
-               if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+               dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+               if (!dev)
                         return -ENODEV;
  
                 num = (dev->mtu - sizeof(struct iphdr) -
                        sizeof(struct udphdr) -
                        SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-               sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+               ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
                         SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
                 IP_VS_DBG(7, "setting the maximum length of sync sending "
-                         "message %d.\n", sync_send_mesg_maxlen);
+                         "message %d.\n", ipvs->send_mesg_maxlen);
         } else if (sync_state == IP_VS_STATE_BACKUP) {
-               if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+               dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+               if (!dev)
                         return -ENODEV;
  
-               sync_recv_mesg_maxlen = dev->mtu -
+               ipvs->recv_mesg_maxlen = dev->mtu -
                         sizeof(struct iphdr) - sizeof(struct udphdr);
                 IP_VS_DBG(7, "setting the maximum length of sync receiving "
-                         "message %d.\n", sync_recv_mesg_maxlen);
+                         "message %d.\n", ipvs->recv_mesg_maxlen);
         }
  
         return 0;
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
  static int
  join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
  {
+       struct net *net = sock_net(sk);
         struct ip_mreqn mreq;
         struct net_device *dev;
         int ret;
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
         memset(&mreq, 0, sizeof(mreq));
         memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
  
-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev)
                 return -ENODEV;
         if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
                 return -EINVAL;
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
  
  static int bind_mcastif_addr(struct socket *sock, char *ifname)
  {
+       struct net *net = sock_net(sock->sk);
         struct net_device *dev;
         __be32 addr;
         struct sockaddr_in sin;
  
-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev)
                 return -ENODEV;
  
         addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
  /*
   *      Set up sending multicast socket over UDP
   */
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         struct socket *sock;
         int result;
  
@@ -631,7 +1311,7 @@ static struct socket * make_send_sock(void)
                 return ERR_PTR(result);
         }
  
-       result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+       result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
         if (result < 0) {
                 pr_err("Error setting outbound mcast interface\n");
                 goto error;
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void)
         set_mcast_loop(sock->sk, 0);
         set_mcast_ttl(sock->sk, 1);
  
-       result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+       result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
         if (result < 0) {
                 pr_err("Error binding address of the mcast interface\n");
                 goto error;
@@ -664,8 +1344,9 @@ static struct socket * make_send_sock(void)
  /*
   *      Set up receiving multicast socket over UDP
   */
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
         struct socket *sock;
         int result;
  
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void)
         /* join the multicast group */
         result = join_mcast_group(sock->sk,
                         (struct in_addr *) &mcast_addr.sin_addr,
-                       ip_vs_backup_mcast_ifn);
+                       ipvs->backup_mcast_ifn);
         if (result < 0) {
                 pr_err("Error joining to the multicast group\n");
                 goto error;
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
  static int sync_thread_master(void *data)
  {
         struct ip_vs_sync_thread_data *tinfo = data;
+       struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
         struct ip_vs_sync_buff *sb;
  
         pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
                 "syncid = %d\n",
-               ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+               ipvs->master_mcast_ifn, ipvs->master_syncid);
  
         while (!kthread_should_stop()) {
-               while ((sb = sb_dequeue())) {
+               while ((sb = sb_dequeue(ipvs))) {
                         ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
                         ip_vs_sync_buff_release(sb);
                 }
  
-               /* check if entries stay in curr_sb for 2 seconds */
-               sb = get_curr_sync_buff(2 * HZ);
+               /* check if entries stay in ipvs->sync_buff for 2 seconds */
+               sb = get_curr_sync_buff(ipvs, 2 * HZ);
                 if (sb) {
                         ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
                         ip_vs_sync_buff_release(sb);
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data)
         }
  
         /* clean up the sync_buff queue */
-       while ((sb=sb_dequeue())) {
+       while ((sb = sb_dequeue(ipvs)))
                 ip_vs_sync_buff_release(sb);
-       }
  
         /* clean up the current sync_buff */
-       if ((sb = get_curr_sync_buff(0))) {
+       sb = get_curr_sync_buff(ipvs, 0);
+       if (sb)
                 ip_vs_sync_buff_release(sb);
-       }
  
         /* release the sending multicast socket */
         sock_release(tinfo->sock);
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data)
  static int sync_thread_backup(void *data)
  {
         struct ip_vs_sync_thread_data *tinfo = data;
+       struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
         int len;
  
         pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
                 "syncid = %d\n",
-               ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+               ipvs->backup_mcast_ifn, ipvs->backup_syncid);
  
         while (!kthread_should_stop()) {
                 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data)
                 /* do we have data now? */
                 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
                         len = ip_vs_receive(tinfo->sock, tinfo->buf,
-                                       sync_recv_mesg_maxlen);
+                                       ipvs->recv_mesg_maxlen);
                         if (len <= 0) {
                                 pr_err("receiving message error\n");
                                 break;
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data)
                         /* disable bottom half, because it accesses the data
                            shared by softirq while getting/creating conns */
                         local_bh_disable();
-                       ip_vs_process_message(tinfo->buf, len);
+                       ip_vs_process_message(tinfo->net, tinfo->buf, len);
                         local_bh_enable();
                 }
         }
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data)
  }
  
  
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
  {
         struct ip_vs_sync_thread_data *tinfo;
         struct task_struct **realtask, *task;
         struct socket *sock;
+       struct netns_ipvs *ipvs = net_ipvs(net);
         char *name, *buf = NULL;
         int (*threadfn)(void *data);
         int result = -ENOMEM;
  
         IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
         IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-                 sizeof(struct ip_vs_sync_conn));
+                 sizeof(struct ip_vs_sync_conn_v0));
  
         if (state == IP_VS_STATE_MASTER) {
-               if (sync_master_thread)
+               if (ipvs->master_thread)
                         return -EEXIST;
  
-               strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-                       sizeof(ip_vs_master_mcast_ifn));
-               ip_vs_master_syncid = syncid;
-               realtask = &sync_master_thread;
-               name = "ipvs_syncmaster";
+               strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+                       sizeof(ipvs->master_mcast_ifn));
+               ipvs->master_syncid = syncid;
+               realtask = &ipvs->master_thread;
+               name = "ipvs_master:%d";
                 threadfn = sync_thread_master;
-               sock = make_send_sock();
+               sock = make_send_sock(net);
         } else if (state == IP_VS_STATE_BACKUP) {
-               if (sync_backup_thread)
+               if (ipvs->backup_thread)
                         return -EEXIST;
  
-               strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-                       sizeof(ip_vs_backup_mcast_ifn));
-               ip_vs_backup_syncid = syncid;
-               realtask = &sync_backup_thread;
-               name = "ipvs_syncbackup";
+               strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+                       sizeof(ipvs->backup_mcast_ifn));
+               ipvs->backup_syncid = syncid;
+               realtask = &ipvs->backup_thread;
+               name = "ipvs_backup:%d";
                 threadfn = sync_thread_backup;
-               sock = make_receive_sock();
+               sock = make_receive_sock(net);
         } else {
                 return -EINVAL;
         }
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
                 goto out;
         }
  
-       set_sync_mesg_maxlen(state);
+       set_sync_mesg_maxlen(net, state);
         if (state == IP_VS_STATE_BACKUP) {
-               buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+               buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
                 if (!buf)
                         goto outsocket;
         }
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
         if (!tinfo)
                 goto outbuf;
  
+       tinfo->net = net;
         tinfo->sock = sock;
         tinfo->buf = buf;
  
-       task = kthread_run(threadfn, tinfo, name);
+       task = kthread_run(threadfn, tinfo, name, ipvs->gen);
         if (IS_ERR(task)) {
                 result = PTR_ERR(task);
                 goto outtinfo;
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
  
         /* mark as active */
         *realtask = task;
-       ip_vs_sync_state |= state;
+       ipvs->sync_state |= state;
  
         /* increase the module use count */
         ip_vs_use_count_inc();
@@ -924,16 +1608,18 @@ out:
  }
  
  
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
  {
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
         IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
  
         if (state == IP_VS_STATE_MASTER) {
-               if (!sync_master_thread)
+               if (!ipvs->master_thread)
                         return -ESRCH;
  
                 pr_info("stopping master sync thread %d ...\n",
-                       task_pid_nr(sync_master_thread));
+                       task_pid_nr(ipvs->master_thread));
  
                 /*
                  * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state)
                  * progress of stopping the master sync daemon.
                  */
  
-               spin_lock_bh(&ip_vs_sync_lock);
-               ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-               spin_unlock_bh(&ip_vs_sync_lock);
-               kthread_stop(sync_master_thread);
-               sync_master_thread = NULL;
+               spin_lock_bh(&ipvs->sync_lock);
+               ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+               spin_unlock_bh(&ipvs->sync_lock);
+               kthread_stop(ipvs->master_thread);
+               ipvs->master_thread = NULL;
         } else if (state == IP_VS_STATE_BACKUP) {
-               if (!sync_backup_thread)
+               if (!ipvs->backup_thread)
                         return -ESRCH;
  
                 pr_info("stopping backup sync thread %d ...\n",
-                       task_pid_nr(sync_backup_thread));
+                       task_pid_nr(ipvs->backup_thread));
  
-               ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-               kthread_stop(sync_backup_thread);
-               sync_backup_thread = NULL;
+               ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+               kthread_stop(ipvs->backup_thread);
+               ipvs->backup_thread = NULL;
         } else {
                 return -EINVAL;
         }
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state)
  
         return 0;
  }
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->sync_queue);
+       spin_lock_init(&ipvs->sync_lock);
+       spin_lock_init(&ipvs->sync_buff_lock);
+
+       ipvs->sync_mcast_addr.sin_family = AF_INET;
+       ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+       ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+       return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+       stop_sync_thread(net, IP_VS_STATE_MASTER);
+       stop_sync_thread(net, IP_VS_STATE_BACKUP);
+}
+
+static struct pernet_operations ipvs_sync_ops = {
+       .init = __ip_vs_sync_init,
+       .exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+       return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+       unregister_pernet_subsys(&ipvs_sync_ops);
+}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c

index 5325a3f..1f2a4e3 100644 (file)
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
                         .fl4_tos = RT_TOS(iph->tos),
                         .mark = skb->mark,
                 };
-               struct rtable *rt;
  
                 if (ip_route_output_key(net, &rt, &fl))
                         return 0;
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                 ip_rt_put(rt);
                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                 if (!skb->dev) {
                         struct net *net = dev_net(skb_dst(skb)->dev);
  
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
                                  "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                 if (!skb->dev) {
                         struct net *net = dev_net(skb_dst(skb)->dev);
  
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         df |= (old_iph->frag_off & htons(IP_DF));
  
-       if ((old_iph->frag_off & htons(IP_DF))
-           && mtu < ntohs(old_iph->tot_len)) {
+       if ((old_iph->frag_off & htons(IP_DF) &&
+           mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                 goto tx_error_put;
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
         if (skb_dst(skb))
                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
  
-       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+           !skb_is_gso(skb)) {
                 if (!skb->dev) {
                         struct net *net = dev_net(skb_dst(skb)->dev);
  
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+       if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+           !skb_is_gso(skb)) {
                 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                 ip_rt_put(rt);
                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
                 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                 goto tx_error_put;
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
  
         /* MTU checking */
         mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                 if (!skb->dev) {
                         struct net *net = dev_net(skb_dst(skb)->dev);
  
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c

new file mode 100644 (file)

index 0000000..4e99cca
--- /dev/null
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -0,0 +1,82 @@
+/*
+ *      broadcast connection tracking helper
+ *
+ *      (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+int nf_conntrack_broadcast_help(struct sk_buff *skb,
+                               unsigned int protoff,
+                               struct nf_conn *ct,
+                               enum ip_conntrack_info ctinfo,
+                               unsigned int timeout)
+{
+       struct nf_conntrack_expect *exp;
+       struct iphdr *iph = ip_hdr(skb);
+       struct rtable *rt = skb_rtable(skb);
+       struct in_device *in_dev;
+       struct nf_conn_help *help = nfct_help(ct);
+       __be32 mask = 0;
+
+       /* we're only interested in locally generated packets */
+       if (skb->sk == NULL)
+               goto out;
+       if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+               goto out;
+       if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+               goto out;
+
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(rt->dst.dev);
+       if (in_dev != NULL) {
+               for_primary_ifa(in_dev) {
+                       if (ifa->ifa_broadcast == iph->daddr) {
+                               mask = ifa->ifa_mask;
+                               break;
+                       }
+               } endfor_ifa(in_dev);
+       }
+       rcu_read_unlock();
+
+       if (mask == 0)
+               goto out;
+
+       exp = nf_ct_expect_alloc(ct);
+       if (exp == NULL)
+               goto out;
+
+       exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+       exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
+
+       exp->mask.src.u3.ip       = mask;
+       exp->mask.src.u.udp.port  = htons(0xFFFF);
+
+       exp->expectfn             = NULL;
+       exp->flags                = NF_CT_EXPECT_PERMANENT;
+       exp->class                = NF_CT_EXPECT_CLASS_DEFAULT;
+       exp->helper               = NULL;
+
+       nf_ct_expect_related(exp);
+       nf_ct_expect_put(exp);
+
+       nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+       return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
+
+MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index e615119..1909311 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -43,6 +43,7 @@
  #include <net/netfilter/nf_conntrack_acct.h>
  #include <net/netfilter/nf_conntrack_ecache.h>
  #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
  #include <net/netfilter/nf_nat.h>
  #include <net/netfilter/nf_nat_core.h>
  
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
  static void death_by_timeout(unsigned long ul_conntrack)
  {
         struct nf_conn *ct = (void *)ul_conntrack;
+       struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp && tstamp->stop == 0)
+               tstamp->stop = ktime_to_ns(ktime_get_real());
  
         if (!test_bit(IPS_DYING_BIT, &ct->status) &&
             unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         struct nf_conntrack_tuple_hash *h;
         struct nf_conn *ct;
         struct nf_conn_help *help;
+       struct nf_conn_tstamp *tstamp;
         struct hlist_nulls_node *n;
         enum ip_conntrack_info ctinfo;
         struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
         ct->timeout.expires += jiffies;
         add_timer(&ct->timeout);
         atomic_inc(&ct->ct_general.use);
-       set_bit(IPS_CONFIRMED_BIT, &ct->status);
+       ct->status |= IPS_CONFIRMED;
+
+       /* set conntrack timestamp, if enabled. */
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp) {
+               if (skb->tstamp.tv64 == 0)
+                       __net_timestamp((struct sk_buff *)skb);
  
+               tstamp->start = ktime_to_ns(skb->tstamp);
+       }
         /* Since the lookup is lockless, hash insertion must be done after
          * starting the timer and setting the CONFIRMED bit. The RCU barriers
          * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
          * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
          */
         memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
-              sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+              offsetof(struct nf_conn, proto) -
+              offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
         spin_lock_init(&ct->lock);
         ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
         ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
         }
  
         nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+       nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
  
         ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
         nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1185,6 +1202,11 @@ struct __nf_ct_flush_report {
  static int kill_report(struct nf_conn *i, void *data)
  {
         struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+       struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(i);
+       if (tstamp && tstamp->stop == 0)
+               tstamp->stop = ktime_to_ns(ktime_get_real());
  
         /* If we fail to deliver the event, death_by_timeout() will retry */
         if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1201,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data)
         return 1;
  }
  
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
  {
-       if (vmalloced)
+       if (is_vmalloc_addr(hash))
                 vfree(hash);
         else
                 free_pages((unsigned long)hash,
@@ -1270,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
                 goto i_see_dead_people;
         }
  
-       nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-                            net->ct.htable_size);
+       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
         nf_conntrack_ecache_fini(net);
         nf_conntrack_acct_fini(net);
         nf_conntrack_expect_fini(net);
@@ -1300,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net)
         }
  }
  
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
  {
         struct hlist_nulls_head *hash;
         unsigned int nr_slots, i;
         size_t sz;
  
-       *vmalloced = 0;
-
         BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
         nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
         sz = nr_slots * sizeof(struct hlist_nulls_head);
         hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                         get_order(sz));
         if (!hash) {
-               *vmalloced = 1;
                 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
                 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
                                  PAGE_KERNEL);
@@ -1330,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
  
  int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
  {
-       int i, bucket, vmalloced, old_vmalloced;
+       int i, bucket;
         unsigned int hashsize, old_size;
         struct hlist_nulls_head *hash, *old_hash;
         struct nf_conntrack_tuple_hash *h;
@@ -1347,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
         if (!hashsize)
                 return -EINVAL;
  
-       hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+       hash = nf_ct_alloc_hashtable(&hashsize, 1);
         if (!hash)
                 return -ENOMEM;
  
@@ -1369,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
                 }
         }
         old_size = init_net.ct.htable_size;
-       old_vmalloced = init_net.ct.hash_vmalloc;
         old_hash = init_net.ct.hash;
  
         init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-       init_net.ct.hash_vmalloc = vmalloced;
         init_net.ct.hash = hash;
         spin_unlock_bh(&nf_conntrack_lock);
  
-       nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+       nf_ct_free_hashtable(old_hash, old_size);
         return 0;
  }
  EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1490,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net)
         }
  
         net->ct.htable_size = nf_conntrack_htable_size;
-       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
-                                            &net->ct.hash_vmalloc, 1);
+       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
         if (!net->ct.hash) {
                 ret = -ENOMEM;
                 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1503,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net)
         ret = nf_conntrack_acct_init(net);
         if (ret < 0)
                 goto err_acct;
+       ret = nf_conntrack_tstamp_init(net);
+       if (ret < 0)
+               goto err_tstamp;
         ret = nf_conntrack_ecache_init(net);
         if (ret < 0)
                 goto err_ecache;
@@ -1510,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net)
         return 0;
  
  err_ecache:
+       nf_conntrack_tstamp_fini(net);
+err_tstamp:
         nf_conntrack_acct_fini(net);
  err_acct:
         nf_conntrack_expect_fini(net);
  err_expect:
-       nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-                            net->ct.htable_size);
+       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
  err_hash:
         kmem_cache_destroy(net->ct.nf_conntrack_cachep);
  err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c

index a20fb0b..cd1e8e0 100644 (file)
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
         const struct nf_conntrack_expect_policy *p;
         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
  
-       atomic_inc(&exp->use);
+       /* two references : one for hash insert, one for the timer */
+       atomic_add(2, &exp->use);
  
         if (master_help) {
                 hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
                     (unsigned long)exp);
         if (master_help) {
-               p = &master_help->helper->expect_policy[exp->class];
+               p = &rcu_dereference_protected(
+                               master_help->helper,
+                               lockdep_is_held(&nf_conntrack_lock)
+                               )->expect_policy[exp->class];
                 exp->timeout.expires = jiffies + p->timeout * HZ;
         }
         add_timer(&exp->timeout);
  
-       atomic_inc(&exp->use);
         NF_CT_STAT_INC(net, expect_create);
  }
  
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
         if (!del_timer(&i->timeout))
                 return 0;
  
-       p = &master_help->helper->expect_policy[i->class];
+       p = &rcu_dereference_protected(
+               master_help->helper,
+               lockdep_is_held(&nf_conntrack_lock)
+               )->expect_policy[i->class];
         i->timeout.expires = jiffies + p->timeout * HZ;
         add_timer(&i->timeout);
         return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
         }
         /* Will be over limit? */
         if (master_help) {
-               p = &master_help->helper->expect_policy[expect->class];
+               p = &rcu_dereference_protected(
+                       master_help->helper,
+                       lockdep_is_held(&nf_conntrack_lock)
+                       )->expect_policy[expect->class];
                 if (p->max_expected &&
                     master_help->expecting[expect->class] >= p->max_expected) {
                         evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
         struct hlist_node *n;
  
         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                 if (n)
                         return n;
         }
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
         struct net *net = seq_file_net(seq);
         struct ct_expect_iter_state *st = seq->private;
  
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
         while (head == NULL) {
                 if (++st->bucket >= nf_ct_expect_hsize)
                         return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
         }
         return head;
  }
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
         }
  
         net->ct.expect_count = 0;
-       net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
-                                                 &net->ct.expect_vmalloc, 0);
+       net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
         if (net->ct.expect_hash == NULL)
                 goto err1;
  
@@ -653,8 +661,7 @@ err3:
         if (net_eq(net, &init_net))
                 kmem_cache_destroy(nf_ct_expect_cachep);
  err2:
-       nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-                            nf_ct_expect_hsize);
+       nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
  err1:
         return err;
  }
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
                 rcu_barrier(); /* Wait for call_rcu() before destroy */
                 kmem_cache_destroy(nf_ct_expect_cachep);
         }
-       nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-                            nf_ct_expect_hsize);
+       nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
  }
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c

index bd82450..80a23ed 100644 (file)
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
         /* This assumes that extended areas in conntrack for the types
            whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
         for (i = min; i <= max; i++) {
-               t1 = nf_ct_ext_types[i];
+               t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+                               lockdep_is_held(&nf_ct_ext_type_mutex));
                 if (!t1)
                         continue;
  
-               t1->alloc_size = sizeof(struct nf_ct_ext)
-                                + ALIGN(sizeof(struct nf_ct_ext), t1->align)
-                                + t1->len;
+               t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+                                t1->len;
                 for (j = 0; j < NF_CT_EXT_NUM; j++) {
-                       t2 = nf_ct_ext_types[j];
+                       t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+                               lockdep_is_held(&nf_ct_ext_type_mutex));
                         if (t2 == NULL || t2 == t1 ||
                             (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
                                 continue;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c

index 59e1a4c..1bdfea3 100644 (file)
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
  static struct hlist_head *nf_ct_helper_hash __read_mostly;
  static unsigned int nf_ct_helper_hsize __read_mostly;
  static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
  
  
  /* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
         struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
         struct nf_conn_help *help = nfct_help(ct);
  
-       if (help && help->helper == me) {
+       if (help && rcu_dereference_protected(
+                       help->helper,
+                       lockdep_is_held(&nf_conntrack_lock)
+                       ) == me) {
                 nf_conntrack_event(IPCT_HELPER, ct);
                 rcu_assign_pointer(help->helper, NULL);
         }
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
                 hlist_for_each_entry_safe(exp, n, next,
                                           &net->ct.expect_hash[i], hnode) {
                         struct nf_conn_help *help = nfct_help(exp->master);
-                       if ((help->helper == me || exp->helper == me) &&
+                       if ((rcu_dereference_protected(
+                                       help->helper,
+                                       lockdep_is_held(&nf_conntrack_lock)
+                                       ) == me || exp->helper == me) &&
                             del_timer(&exp->timeout)) {
                                 nf_ct_unlink_expect(exp);
                                 nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
         int err;
  
         nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-       nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
-                                                 &nf_ct_helper_vmalloc, 0);
+       nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
         if (!nf_ct_helper_hash)
                 return -ENOMEM;
  
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
         return 0;
  
  err1:
-       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-                            nf_ct_helper_hsize);
+       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
         return err;
  }
  
  void nf_conntrack_helper_fini(void)
  {
         nf_ct_extend_unregister(&helper_extend);
-       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-                            nf_ct_helper_hsize);
+       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
  }
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c

index aadde01..4c8f30a 100644 (file)
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -18,14 +18,7 @@
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
  #include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/route.h>
  
  #include <net/netfilter/nf_conntrack.h>
  #include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
  MODULE_ALIAS_NFCT_HELPER("netbios_ns");
  
  static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, 0400);
+module_param(timeout, uint, S_IRUSR);
  MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
  
-static int help(struct sk_buff *skb, unsigned int protoff,
-               struct nf_conn *ct, enum ip_conntrack_info ctinfo)
-{
-       struct nf_conntrack_expect *exp;
-       struct iphdr *iph = ip_hdr(skb);
-       struct rtable *rt = skb_rtable(skb);
-       struct in_device *in_dev;
-       __be32 mask = 0;
-
-       /* we're only interested in locally generated packets */
-       if (skb->sk == NULL)
-               goto out;
-       if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
-               goto out;
-       if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-               goto out;
-
-       rcu_read_lock();
-       in_dev = __in_dev_get_rcu(rt->dst.dev);
-       if (in_dev != NULL) {
-               for_primary_ifa(in_dev) {
-                       if (ifa->ifa_broadcast == iph->daddr) {
-                               mask = ifa->ifa_mask;
-                               break;
-                       }
-               } endfor_ifa(in_dev);
-       }
-       rcu_read_unlock();
-
-       if (mask == 0)
-               goto out;
-
-       exp = nf_ct_expect_alloc(ct);
-       if (exp == NULL)
-               goto out;
-
-       exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-       exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
-       exp->mask.src.u3.ip       = mask;
-       exp->mask.src.u.udp.port  = htons(0xFFFF);
-
-       exp->expectfn             = NULL;
-       exp->flags                = NF_CT_EXPECT_PERMANENT;
-       exp->class                = NF_CT_EXPECT_CLASS_DEFAULT;
-       exp->helper               = NULL;
-
-       nf_ct_expect_related(exp);
-       nf_ct_expect_put(exp);
-
-       nf_ct_refresh(ct, skb, timeout * HZ);
-out:
-       return NF_ACCEPT;
-}
-
  static struct nf_conntrack_expect_policy exp_policy = {
         .max_expected   = 1,
  };
  
+static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
+                  struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+       return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+}
+
  static struct nf_conntrack_helper helper __read_mostly = {
         .name                   = "netbios-ns",
-       .tuple.src.l3num        = AF_INET,
+       .tuple.src.l3num        = NFPROTO_IPV4,
         .tuple.src.u.udp.port   = cpu_to_be16(NMBD_PORT),
         .tuple.dst.protonum     = IPPROTO_UDP,
         .me                     = THIS_MODULE,
-       .help                   = help,
+       .help                   = netbios_ns_help,
         .expect_policy          = &exp_policy,
  };
  
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c

index 93297aa..3fec12c 100644 (file)
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -42,6 +42,7 @@
  #include <net/netfilter/nf_conntrack_tuple.h>
  #include <net/netfilter/nf_conntrack_acct.h>
  #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
  #ifdef CONFIG_NF_NAT_NEEDED
  #include <net/netfilter/nf_nat_core.h>
  #include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
         return -1;
  }
  
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+       struct nlattr *nest_count;
+       const struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (!tstamp)
+               return 0;
+
+       nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+       if (!nest_count)
+               goto nla_put_failure;
+
+       NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
+       if (tstamp->stop != 0) {
+               NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
+                            cpu_to_be64(tstamp->stop));
+       }
+       nla_nest_end(skb, nest_count);
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
  #ifdef CONFIG_NF_CONNTRACK_MARK
  static inline int
  ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
             ctnetlink_dump_timeout(skb, ct) < 0 ||
             ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
             ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+           ctnetlink_dump_timestamp(skb, ct) < 0 ||
             ctnetlink_dump_protoinfo(skb, ct) < 0 ||
             ctnetlink_dump_helpinfo(skb, ct) < 0 ||
             ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -470,6 +499,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
  #endif
  }
  
+static inline size_t
+ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+               return 0;
+       return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
+#else
+       return 0;
+#endif
+}
+
  static inline size_t
  ctnetlink_nlmsg_size(const struct nf_conn *ct)
  {
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
                + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
                + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
                + ctnetlink_counters_size(ct)
+              + ctnetlink_timestamp_size(ct)
                + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
                + nla_total_size(0) /* CTA_PROTOINFO */
                + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
  
         if (events & (1 << IPCT_DESTROY)) {
                 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-                   ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+                   ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+                   ctnetlink_dump_timestamp(skb, ct) < 0)
                         goto nla_put_failure;
         } else {
                 if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -1357,6 +1400,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
         }
  
         nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+       nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
         nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
         /* we must add conntrack extensions before confirmation. */
         ct->status |= IPS_CONFIRMED;
@@ -1375,6 +1419,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
         }
  #endif
  
+       memset(&ct->proto, 0, sizeof(ct->proto));
         if (cda[CTA_PROTOINFO]) {
                 err = ctnetlink_change_protoinfo(ct, cda);
                 if (err < 0)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c

index dc7bb74..5701c8d 100644 (file)
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
  int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
  {
         int ret = 0;
+       struct nf_conntrack_l3proto *old;
  
         if (proto->l3proto >= AF_MAX)
                 return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
                 return -EINVAL;
  
         mutex_lock(&nf_ct_proto_mutex);
-       if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+       old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+                                       lockdep_is_held(&nf_ct_proto_mutex));
+       if (old != &nf_conntrack_l3proto_generic) {
                 ret = -EBUSY;
                 goto out_unlock;
         }
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
         BUG_ON(proto->l3proto >= AF_MAX);
  
         mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+       BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+                                        lockdep_is_held(&nf_ct_proto_mutex)
+                                        ) != proto);
         rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
                            &nf_conntrack_l3proto_generic);
         nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
         mutex_lock(&nf_ct_proto_mutex);
         if (!nf_ct_protos[l4proto->l3proto]) {
                 /* l3proto may be loaded latter. */
-               struct nf_conntrack_l4proto **proto_array;
+               struct nf_conntrack_l4proto __rcu **proto_array;
                 int i;
  
                 proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                 }
  
                 for (i = 0; i < MAX_NF_CT_PROTO; i++)
-                       proto_array[i] = &nf_conntrack_l4proto_generic;
+                       RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
  
                 /* Before making proto_array visible to lockless readers,
                  * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                 smp_wmb();
  
                 nf_ct_protos[l4proto->l3proto] = proto_array;
-       } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
-                                       &nf_conntrack_l4proto_generic) {
+       } else if (rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       lockdep_is_held(&nf_ct_proto_mutex)
+                       ) != &nf_conntrack_l4proto_generic) {
                 ret = -EBUSY;
                 goto out_unlock;
         }
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
         BUG_ON(l4proto->l3proto >= PF_MAX);
  
         mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+       BUG_ON(rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       lockdep_is_held(&nf_ct_proto_mutex)
+                       ) != l4proto);
         rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
                            &nf_conntrack_l4proto_generic);
         nf_ct_l4proto_unregister_sysctl(l4proto);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c

index 5292560..9ae57c5 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
         ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
         ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
         ct->proto.dccp.state = CT_DCCP_NONE;
+       ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+       ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+       ct->proto.dccp.handshake_seq = 0;
         return true;
  
  out_invalid:
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c

index c6049c2..6f4ee70 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
             test_bit(SCTP_CID_COOKIE_ACK, map))
                 return false;
  
+       memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
         new_state = SCTP_CONNTRACK_MAX;
         for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
                 /* Don't need lock here: this conntrack not in circulation yet */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c

index 3fb2b73..6f38d0e 100644 (file)
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
         BUG_ON(th == NULL);
  
         /* Don't need lock here: this conntrack not in circulation yet */
-       new_state
-               = tcp_conntracks[0][get_conntrack_index(th)]
-               [TCP_CONNTRACK_NONE];
+       new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
  
         /* Invalid: delete conntrack */
         if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
         }
  
         if (new_state == TCP_CONNTRACK_SYN_SENT) {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
                 /* SYN packet */
                 ct->proto.tcp.seen[0].td_end =
                         segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                         ct->proto.tcp.seen[0].td_end;
  
                 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-               ct->proto.tcp.seen[1].flags = 0;
         } else if (nf_ct_tcp_loose == 0) {
                 /* Don't try to pick up connections. */
                 return false;
         } else {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
                 /*
                  * We are in the middle of a connection,
                  * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                 ct->proto.tcp.seen[0].td_maxend =
                         ct->proto.tcp.seen[0].td_end +
                         ct->proto.tcp.seen[0].td_maxwin;
-               ct->proto.tcp.seen[0].td_scale = 0;
  
                 /* We assume SACK and liberal window checking to handle
                  * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                                               IP_CT_TCP_FLAG_BE_LIBERAL;
         }
  
-       ct->proto.tcp.seen[1].td_end = 0;
-       ct->proto.tcp.seen[1].td_maxend = 0;
-       ct->proto.tcp.seen[1].td_maxwin = 0;
-       ct->proto.tcp.seen[1].td_scale = 0;
-
         /* tcp_packet will set them */
-       ct->proto.tcp.state = TCP_CONNTRACK_NONE;
         ct->proto.tcp.last_index = TCP_NONE_SET;
  
         pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c

new file mode 100644 (file)

index 0000000..6e545e2
--- /dev/null
+++ b/net/netfilter/nf_conntrack_snmp.c
@@ -0,0 +1,77 @@
+/*
+ *      SNMP service broadcast connection tracking helper
+ *
+ *      (c) 2011 Jiri Olsa <jolsa@redhat.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define SNMP_PORT      161
+
+MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
+MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFCT_HELPER("snmp");
+
+static unsigned int timeout __read_mostly = 30;
+module_param(timeout, uint, S_IRUSR);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+                       unsigned int protoff,
+                       struct nf_conn *ct,
+                       enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
+
+static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+               struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+       typeof(nf_nat_snmp_hook) nf_nat_snmp;
+
+       nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+
+       nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
+       if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
+               return nf_nat_snmp(skb, protoff, ct, ctinfo);
+
+       return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+       .max_expected   = 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+       .name                   = "snmp",
+       .tuple.src.l3num        = NFPROTO_IPV4,
+       .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
+       .tuple.dst.protonum     = IPPROTO_UDP,
+       .me                     = THIS_MODULE,
+       .help                   = snmp_conntrack_help,
+       .expect_policy          = &exp_policy,
+};
+
+static int __init nf_conntrack_snmp_init(void)
+{
+       exp_policy.timeout = timeout;
+       return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_snmp_fini(void)
+{
+       nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_snmp_init);
+module_exit(nf_conntrack_snmp_fini);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c

index b4d7f0f..0ae1428 100644 (file)
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -29,6 +29,8 @@
  #include <net/netfilter/nf_conntrack_helper.h>
  #include <net/netfilter/nf_conntrack_acct.h>
  #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <linux/rculist_nulls.h>
  
  MODULE_LICENSE("GPL");
  
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
  struct ct_iter_state {
         struct seq_net_private p;
         unsigned int bucket;
+       u_int64_t time_now;
  };
  
  static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
         for (st->bucket = 0;
              st->bucket < net->ct.htable_size;
              st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                 if (!is_a_nulls(n))
                         return n;
         }
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
         struct net *net = seq_file_net(seq);
         struct ct_iter_state *st = seq->private;
  
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
         while (is_a_nulls(head)) {
                 if (likely(get_nulls_value(head) == st->bucket)) {
                         if (++st->bucket >= net->ct.htable_size)
                                 return NULL;
                 }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                               hlist_nulls_first_rcu(
+                                       &net->ct.hash[st->bucket]));
         }
         return head;
  }
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
  static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
         __acquires(RCU)
  {
+       struct ct_iter_state *st = seq->private;
+
+       st->time_now = ktime_to_ns(ktime_get_real());
         rcu_read_lock();
         return ct_get_idx(seq, *pos);
  }
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
  }
  #endif
  
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+       struct ct_iter_state *st = s->private;
+       struct nf_conn_tstamp *tstamp;
+       s64 delta_time;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp) {
+               delta_time = st->time_now - tstamp->start;
+               if (delta_time > 0)
+                       delta_time = div_s64(delta_time, NSEC_PER_SEC);
+               else
+                       delta_time = 0;
+
+               return seq_printf(s, "delta-time=%llu ",
+                                 (unsigned long long)delta_time);
+       }
+       return 0;
+}
+#else
+static inline int
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+       return 0;
+}
+#endif
+
  /* return 0 on success, 1 in case of error */
  static int ct_seq_show(struct seq_file *s, void *v)
  {
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
                 goto release;
  #endif
  
+       if (ct_show_delta_time(s, ct))
+               goto release;
+
         if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
                 goto release;
  
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c

new file mode 100644 (file)

index 0000000..af7dd31
--- /dev/null
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static int nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tstamp_sysctl_table[] = {
+       {
+               .procname       = "nf_conntrack_timestamp",
+               .data           = &init_net.ct.sysctl_tstamp,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+       .len    = sizeof(struct nf_conn_tstamp),
+       .align  = __alignof__(struct nf_conn_tstamp),
+       .id     = NF_CT_EXT_TSTAMP,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+       struct ctl_table *table;
+
+       table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
+                       GFP_KERNEL);
+       if (!table)
+               goto out;
+
+       table[0].data = &net->ct.sysctl_tstamp;
+
+       net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
+                       nf_net_netfilter_sysctl_path, table);
+       if (!net->ct.tstamp_sysctl_header) {
+               printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+               goto out_register;
+       }
+       return 0;
+
+out_register:
+       kfree(table);
+out:
+       return -ENOMEM;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+       struct ctl_table *table;
+
+       table = net->ct.tstamp_sysctl_header->ctl_table_arg;
+       unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
+       kfree(table);
+}
+#else
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+       return 0;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_tstamp_init(struct net *net)
+{
+       int ret;
+
+       net->ct.sysctl_tstamp = nf_ct_tstamp;
+
+       if (net_eq(net, &init_net)) {
+               ret = nf_ct_extend_register(&tstamp_extend);
+               if (ret < 0) {
+                       printk(KERN_ERR "nf_ct_tstamp: Unable to register "
+                                       "extension\n");
+                       goto out_extend_register;
+               }
+       }
+
+       ret = nf_conntrack_tstamp_init_sysctl(net);
+       if (ret < 0)
+               goto out_sysctl;
+
+       return 0;
+
+out_sysctl:
+       if (net_eq(net, &init_net))
+               nf_ct_extend_unregister(&tstamp_extend);
+out_extend_register:
+       return ret;
+}
+
+void nf_conntrack_tstamp_fini(struct net *net)
+{
+       nf_conntrack_tstamp_fini_sysctl(net);
+       if (net_eq(net, &init_net))
+               nf_ct_extend_unregister(&tstamp_extend);
+}
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c

index b07393e..20c775c 100644 (file)
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v)
         struct nf_logger *t;
         int ret;
  
-       logger = nf_loggers[*pos];
+       logger = rcu_dereference_protected(nf_loggers[*pos],
+                                          lockdep_is_held(&nf_log_mutex));
  
         if (!logger)
                 ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
                 mutex_unlock(&nf_log_mutex);
         } else {
                 mutex_lock(&nf_log_mutex);
-               logger = nf_loggers[tindex];
+               logger = rcu_dereference_protected(nf_loggers[tindex],
+                                                  lockdep_is_held(&nf_log_mutex));
                 if (!logger)
                         table->data = "NONE";
                 else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c

index 74aebed..5ab22e2 100644 (file)
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
  int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
  {
         int ret;
+       const struct nf_queue_handler *old;
  
         if (pf >= ARRAY_SIZE(queue_handler))
                 return -EINVAL;
  
         mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] == qh)
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old == qh)
                 ret = -EEXIST;
-       else if (queue_handler[pf])
+       else if (old)
                 ret = -EBUSY;
         else {
                 rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
  /* The caller must flush their queue before this */
  int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
  {
+       const struct nf_queue_handler *old;
+
         if (pf >= ARRAY_SIZE(queue_handler))
                 return -EINVAL;
  
         mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] && queue_handler[pf] != qh) {
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old && old != qh) {
                 mutex_unlock(&queue_handler_mutex);
                 return -EINVAL;
         }
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
  
         mutex_lock(&queue_handler_mutex);
         for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
-               if (queue_handler[pf] == qh)
+               if (rcu_dereference_protected(
+                               queue_handler[pf],
+                               lockdep_is_held(&queue_handler_mutex)
+                               ) == qh)
                         rcu_assign_pointer(queue_handler[pf], NULL);
         }
         mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
                       int (*okfn)(struct sk_buff *),
                       unsigned int queuenum)
  {
-       int status;
+       int status = -ENOENT;
         struct nf_queue_entry *entry = NULL;
  #ifdef CONFIG_BRIDGE_NETFILTER
         struct net_device *physindev;
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb,
         rcu_read_lock();
  
         qh = rcu_dereference(queue_handler[pf]);
-       if (!qh)
+       if (!qh) {
+               status = -ESRCH;
                 goto err_unlock;
+       }
  
         afinfo = nf_get_afinfo(pf);
         if (!afinfo)
                 goto err_unlock;
  
         entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
-       if (!entry)
+       if (!entry) {
+               status = -ENOMEM;
                 goto err_unlock;
+       }
  
         *entry = (struct nf_queue_entry) {
                 .skb    = skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
  
         /* If it's going away, ignore hook. */
         if (!try_module_get(entry->elem->owner)) {
-               rcu_read_unlock();
-               kfree(entry);
-               return 0;
+               status = -ECANCELED;
+               goto err_unlock;
         }
-
         /* Bump dev refs so they don't vanish while packet is out */
         if (indev)
                 dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
                 goto err;
         }
  
-       return 1;
+       return 0;
  
  err_unlock:
         rcu_read_unlock();
  err:
-       kfree_skb(skb);
         kfree(entry);
-       return 1;
+       return status;
  }
  
  int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
              unsigned int queuenum)
  {
         struct sk_buff *segs;
+       int err;
+       unsigned int queued;
  
         if (!skb_is_gso(skb))
                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
         }
  
         segs = skb_gso_segment(skb, 0);
-       kfree_skb(skb);
+       /* Does not use PTR_ERR to limit the number of error codes that can be
+        * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
+        * 'ignore this hook'.
+        */
         if (IS_ERR(segs))
-               return 1;
+               return -EINVAL;
  
+       queued = 0;
+       err = 0;
         do {
                 struct sk_buff *nskb = segs->next;
  
                 segs->next = NULL;
-               if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
-                               queuenum))
+               if (err == 0)
+                       err = __nf_queue(segs, elem, pf, hook, indev,
+                                          outdev, okfn, queuenum);
+               if (err == 0)
+                       queued++;
+               else
                         kfree_skb(segs);
                 segs = nskb;
         } while (segs);
-       return 1;
+
+       /* also free orig skb if only some segments were queued */
+       if (unlikely(err && queued))
+               err = 0;
+       if (err == 0)
+               kfree_skb(skb);
+       return err;
  }
  
  void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
         struct sk_buff *skb = entry->skb;
         struct list_head *elem = &entry->elem->list;
         const struct nf_afinfo *afinfo;
+       int err;
  
         rcu_read_lock();
  
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
                 local_bh_enable();
                 break;
         case NF_QUEUE:
-               if (!__nf_queue(skb, elem, entry->pf, entry->hook,
-                               entry->indev, entry->outdev, entry->okfn,
-                               verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               err = __nf_queue(skb, elem, entry->pf, entry->hook,
+                                entry->indev, entry->outdev, entry->okfn,
+                                verdict >> NF_VERDICT_QBITS);
+               if (err < 0) {
+                       if (err == -ECANCELED)
+                               goto next_hook;
+                       if (err == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
                 break;
         case NF_STOLEN:
         default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c

index 6a1572b..91592da 100644 (file)
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
  
         for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
                 if (!hlist_empty(&instance_table[st->bucket]))
-                       return rcu_dereference_bh(instance_table[st->bucket].first);
+                       return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
         }
         return NULL;
  }
  
  static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
  {
-       h = rcu_dereference_bh(h->next);
+       h = rcu_dereference_bh(hlist_next_rcu(h));
         while (!h) {
                 if (++st->bucket >= INSTANCE_BUCKETS)
                         return NULL;
  
-               h = rcu_dereference_bh(instance_table[st->bucket].first);
+               h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
         }
         return h;
  }
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c

index 68e67d1..b83123f 100644 (file)
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
  {
         struct sk_buff *nskb;
         struct nfqnl_instance *queue;
-       int err;
+       int err = -ENOBUFS;
  
         /* rcu_read_lock()ed by nf_hook_slow() */
         queue = instance_lookup(queuenum);
-       if (!queue)
+       if (!queue) {
+               err = -ESRCH;
                 goto err_out;
+       }
  
-       if (queue->copy_mode == NFQNL_COPY_NONE)
+       if (queue->copy_mode == NFQNL_COPY_NONE) {
+               err = -EINVAL;
                 goto err_out;
+       }
  
         nskb = nfqnl_build_packet_message(queue, entry);
-       if (nskb == NULL)
+       if (nskb == NULL) {
+               err = -ENOMEM;
                 goto err_out;
-
+       }
         spin_lock_bh(&queue->lock);
  
-       if (!queue->peer_pid)
+       if (!queue->peer_pid) {
+               err = -EINVAL;
                 goto err_out_free_nskb;
-
+       }
         if (queue->queue_total >= queue->queue_maxlen) {
                 queue->queue_dropped++;
                 if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
  err_out_unlock:
         spin_unlock_bh(&queue->lock);
  err_out:
-       return -1;
+       return err;
  }
  
  static int
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c

index c942376..0a77d2f 100644 (file)
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -23,6 +23,7 @@
  #include <linux/mutex.h>
  #include <linux/mm.h>
  #include <linux/slab.h>
+#include <linux/audit.h>
  #include <net/net_namespace.h>
  
  #include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
  #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
  
  struct compat_delta {
-       struct compat_delta *next;
-       unsigned int offset;
-       int delta;
+       unsigned int offset; /* offset in kernel */
+       int delta; /* delta in 32bit user land */
  };
  
  struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
         struct list_head target;
  #ifdef CONFIG_COMPAT
         struct mutex compat_mutex;
-       struct compat_delta *compat_offsets;
+       struct compat_delta *compat_tab;
+       unsigned int number; /* number of slots in compat_tab[] */
+       unsigned int cur; /* number of used slots in compat_tab[] */
  #endif
  };
  
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par,
  EXPORT_SYMBOL_GPL(xt_check_match);
  
  #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
  {
-       struct compat_delta *tmp;
+       struct xt_af *xp = &xt[af];
  
-       tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-       if (!tmp)
-               return -ENOMEM;
+       if (!xp->compat_tab) {
+               if (!xp->number)
+                       return -EINVAL;
+               xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
+               if (!xp->compat_tab)
+                       return -ENOMEM;
+               xp->cur = 0;
+       }
  
-       tmp->offset = offset;
-       tmp->delta = delta;
+       if (xp->cur >= xp->number)
+               return -EINVAL;
  
-       if (xt[af].compat_offsets) {
-               tmp->next = xt[af].compat_offsets->next;
-               xt[af].compat_offsets->next = tmp;
-       } else {
-               xt[af].compat_offsets = tmp;
-               tmp->next = NULL;
-       }
+       if (xp->cur)
+               delta += xp->compat_tab[xp->cur - 1].delta;
+       xp->compat_tab[xp->cur].offset = offset;
+       xp->compat_tab[xp->cur].delta = delta;
+       xp->cur++;
         return 0;
  }
  EXPORT_SYMBOL_GPL(xt_compat_add_offset);
  
  void xt_compat_flush_offsets(u_int8_t af)
  {
-       struct compat_delta *tmp, *next;
-
-       if (xt[af].compat_offsets) {
-               for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
-                       next = tmp->next;
-                       kfree(tmp);
-               }
-               xt[af].compat_offsets = NULL;
+       if (xt[af].compat_tab) {
+               vfree(xt[af].compat_tab);
+               xt[af].compat_tab = NULL;
+               xt[af].number = 0;
         }
  }
  EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
  
  int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
  {
-       struct compat_delta *tmp;
-       int delta;
-
-       for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
-               if (tmp->offset < offset)
-                       delta += tmp->delta;
-       return delta;
+       struct compat_delta *tmp = xt[af].compat_tab;
+       int mid, left = 0, right = xt[af].cur - 1;
+
+       while (left <= right) {
+               mid = (left + right) >> 1;
+               if (offset > tmp[mid].offset)
+                       left = mid + 1;
+               else if (offset < tmp[mid].offset)
+                       right = mid - 1;
+               else
+                       return mid ? tmp[mid - 1].delta : 0;
+       }
+       WARN_ON_ONCE(1);
+       return 0;
  }
  EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
  
+void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+{
+       xt[af].number = number;
+       xt[af].cur = 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
  int xt_compat_match_offset(const struct xt_match *match)
  {
         u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table,
          */
         local_bh_enable();
  
+#ifdef CONFIG_AUDIT
+       if (audit_enabled) {
+               struct audit_buffer *ab;
+
+               ab = audit_log_start(current->audit_context, GFP_KERNEL,
+                                    AUDIT_NETFILTER_CFG);
+               if (ab) {
+                       audit_log_format(ab, "table=%s family=%u entries=%u",
+                                        table->name, table->af,
+                                        private->number);
+                       audit_log_end(ab);
+               }
+       }
+#endif
+
         return private;
  }
  EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1368,7 @@ static int __init xt_init(void)
                 mutex_init(&xt[i].mutex);
  #ifdef CONFIG_COMPAT
                 mutex_init(&xt[i].compat_mutex);
-               xt[i].compat_offsets = NULL;
+               xt[i].compat_tab = NULL;
  #endif
                 INIT_LIST_HEAD(&xt[i].target);
                 INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c

new file mode 100644 (file)

index 0000000..81802d2
--- /dev/null
+++ b/net/netfilter/xt_AUDIT.c
@@ -0,0 +1,204 @@
+/*
+ * Creates audit record for dropped/accepted packets
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/audit.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_AUDIT.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
+MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
+MODULE_ALIAS("ipt_AUDIT");
+MODULE_ALIAS("ip6t_AUDIT");
+MODULE_ALIAS("ebt_AUDIT");
+MODULE_ALIAS("arpt_AUDIT");
+
+static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
+                       unsigned int proto, unsigned int offset)
+{
+       switch (proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_UDPLITE: {
+               const __be16 *pptr;
+               __be16 _ports[2];
+
+               pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
+               if (pptr == NULL) {
+                       audit_log_format(ab, " truncated=1");
+                       return;
+               }
+
+               audit_log_format(ab, " sport=%hu dport=%hu",
+                                ntohs(pptr[0]), ntohs(pptr[1]));
+               }
+               break;
+
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6: {
+               const u8 *iptr;
+               u8 _ih[2];
+
+               iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
+               if (iptr == NULL) {
+                       audit_log_format(ab, " truncated=1");
+                       return;
+               }
+
+               audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
+                                iptr[0], iptr[1]);
+
+               }
+               break;
+       }
+}
+
+static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+       struct iphdr _iph;
+       const struct iphdr *ih;
+
+       ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+       if (!ih) {
+               audit_log_format(ab, " truncated=1");
+               return;
+       }
+
+       audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
+               &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
+
+       if (ntohs(ih->frag_off) & IP_OFFSET) {
+               audit_log_format(ab, " frag=1");
+               return;
+       }
+
+       audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
+}
+
+static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+       struct ipv6hdr _ip6h;
+       const struct ipv6hdr *ih;
+       u8 nexthdr;
+       int offset;
+
+       ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+       if (!ih) {
+               audit_log_format(ab, " truncated=1");
+               return;
+       }
+
+       nexthdr = ih->nexthdr;
+       offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
+                                 &nexthdr);
+
+       audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+                        &ih->saddr, &ih->daddr, nexthdr);
+
+       if (offset)
+               audit_proto(ab, skb, nexthdr, offset);
+}
+
+static unsigned int
+audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct xt_audit_info *info = par->targinfo;
+       struct audit_buffer *ab;
+
+       ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+       if (ab == NULL)
+               goto errout;
+
+       audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
+                        info->type, par->hooknum, skb->len,
+                        par->in ? par->in->name : "?",
+                        par->out ? par->out->name : "?");
+
+       if (skb->mark)
+               audit_log_format(ab, " mark=%#x", skb->mark);
+
+       if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+               audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
+                                eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+                                ntohs(eth_hdr(skb)->h_proto));
+
+               if (par->family == NFPROTO_BRIDGE) {
+                       switch (eth_hdr(skb)->h_proto) {
+                       case __constant_htons(ETH_P_IP):
+                               audit_ip4(ab, skb);
+                               break;
+
+                       case __constant_htons(ETH_P_IPV6):
+                               audit_ip6(ab, skb);
+                               break;
+                       }
+               }
+       }
+
+       switch (par->family) {
+       case NFPROTO_IPV4:
+               audit_ip4(ab, skb);
+               break;
+
+       case NFPROTO_IPV6:
+               audit_ip6(ab, skb);
+               break;
+       }
+
+       audit_log_end(ab);
+
+errout:
+       return XT_CONTINUE;
+}
+
+static int audit_tg_check(const struct xt_tgchk_param *par)
+{
+       const struct xt_audit_info *info = par->targinfo;
+
+       if (info->type > XT_AUDIT_TYPE_MAX) {
+               pr_info("Audit type out of range (valid range: 0..%hhu)\n",
+                       XT_AUDIT_TYPE_MAX);
+               return -ERANGE;
+       }
+
+       return 0;
+}
+
+static struct xt_target audit_tg_reg __read_mostly = {
+       .name           = "AUDIT",
+       .family         = NFPROTO_UNSPEC,
+       .target         = audit_tg,
+       .targetsize     = sizeof(struct xt_audit_info),
+       .checkentry     = audit_tg_check,
+       .me             = THIS_MODULE,
+};
+
+static int __init audit_tg_init(void)
+{
+       return xt_register_target(&audit_tg_reg);
+}
+
+static void __exit audit_tg_exit(void)
+{
+       xt_unregister_target(&audit_tg_reg);
+}
+
+module_init(audit_tg_init);
+module_exit(audit_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c

index c2c0e4a..af9c4da 100644 (file)
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -19,12 +19,14 @@
  #include <linux/netfilter_ipv6.h>
  #include <linux/netfilter/x_tables.h>
  #include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
  
  MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
  MODULE_LICENSE("GPL");
  MODULE_DESCRIPTION("Xtables: Qdisc classification");
  MODULE_ALIAS("ipt_CLASSIFY");
  MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
  
  static unsigned int
  classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
         return XT_CONTINUE;
  }
  
-static struct xt_target classify_tg_reg __read_mostly = {
-       .name       = "CLASSIFY",
-       .revision   = 0,
-       .family     = NFPROTO_UNSPEC,
-       .table      = "mangle",
-       .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-                     (1 << NF_INET_POST_ROUTING),
-       .target     = classify_tg,
-       .targetsize = sizeof(struct xt_classify_target_info),
-       .me         = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+               .family     = NFPROTO_UNSPEC,
+               .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+                             (1 << NF_INET_POST_ROUTING),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+               .family     = NFPROTO_ARP,
+               .hooks      = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
  };
  
  static int __init classify_tg_init(void)
  {
-       return xt_register_target(&classify_tg_reg);
+       return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
  }
  
  static void __exit classify_tg_exit(void)
  {
-       xt_unregister_target(&classify_tg_reg);
+       xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
  }
  
  module_init(classify_tg_init);
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c

index be1f22e..3bdd443 100644 (file)
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
  MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
  MODULE_DESCRIPTION("Xtables: idle time monitor");
  MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c

index a414050..993de2b 100644 (file)
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
  MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
  
  static LIST_HEAD(xt_led_triggers);
  static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c

index 039cce1..d4f4b5d 100644 (file)
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
  
         if (info->queues_total > 1) {
                 if (par->family == NFPROTO_IPV4)
-                       queue = hash_v4(skb) % info->queues_total + queue;
+                       queue = (((u64) hash_v4(skb) * info->queues_total) >>
+                                32) + queue;
  #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
                 else if (par->family == NFPROTO_IPV6)
-                       queue = hash_v6(skb) % info->queues_total + queue;
+                       queue = (((u64) hash_v6(skb) * info->queues_total) >>
+                                32) + queue;
  #endif
         }
         return NF_QUEUE_NR(queue);
  }
  
-static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
  {
-       const struct xt_NFQ_info_v1 *info = par->targinfo;
+       const struct xt_NFQ_info_v2 *info = par->targinfo;
+       unsigned int ret = nfqueue_tg_v1(skb, par);
+
+       if (info->bypass)
+               ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+       return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+       const struct xt_NFQ_info_v2 *info = par->targinfo;
         u32 maxid;
  
         if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
                        info->queues_total, maxid);
                 return -ERANGE;
         }
+       if (par->target->revision == 2 && info->bypass > 1)
+               return -EINVAL;
         return 0;
  }
  
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
                 .name           = "NFQUEUE",
                 .revision       = 1,
                 .family         = NFPROTO_UNSPEC,
-               .checkentry     = nfqueue_tg_v1_check,
+               .checkentry     = nfqueue_tg_check,
                 .target         = nfqueue_tg_v1,
                 .targetsize     = sizeof(struct xt_NFQ_info_v1),
                 .me             = THIS_MODULE,
         },
+       {
+               .name           = "NFQUEUE",
+               .revision       = 2,
+               .family         = NFPROTO_UNSPEC,
+               .checkentry     = nfqueue_tg_check,
+               .target         = nfqueue_tg_v2,
+               .targetsize     = sizeof(struct xt_NFQ_info_v2),
+               .me             = THIS_MODULE,
+       },
  };
  
  static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c

index 5c5b6b9..7fd3fd5 100644 (file)
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -193,10 +193,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
  
         if (par->family == NFPROTO_IPV6) {
                 const struct ipv6hdr *iph = ipv6_hdr(skb);
-               memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
+               memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
+                      &iph->daddr : &iph->saddr, sizeof(addr.ip6));
         } else {
                 const struct iphdr *iph = ip_hdr(skb);
-               addr.ip = iph->saddr;
+               addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+                         iph->daddr : iph->saddr;
         }
  
         spin_lock_bh(&info->data->lock);
@@ -204,13 +206,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
                                  &info->mask, par->family);
         spin_unlock_bh(&info->data->lock);
  
-       if (connections < 0) {
+       if (connections < 0)
                 /* kmalloc failed, drop it entirely */
-               par->hotdrop = true;
-               return false;
-       }
+               goto hotdrop;
  
-       return (connections > info->limit) ^ info->inverse;
+       return (connections > info->limit) ^
+              !!(info->flags & XT_CONNLIMIT_INVERT);
  
   hotdrop:
         par->hotdrop = true;
@@ -268,25 +269,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
         kfree(info->data);
  }
  
-static struct xt_match connlimit_mt_reg __read_mostly = {
-       .name       = "connlimit",
-       .revision   = 0,
-       .family     = NFPROTO_UNSPEC,
-       .checkentry = connlimit_mt_check,
-       .match      = connlimit_mt,
-       .matchsize  = sizeof(struct xt_connlimit_info),
-       .destroy    = connlimit_mt_destroy,
-       .me         = THIS_MODULE,
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
+       {
+               .name       = "connlimit",
+               .revision   = 0,
+               .family     = NFPROTO_UNSPEC,
+               .checkentry = connlimit_mt_check,
+               .match      = connlimit_mt,
+               .matchsize  = sizeof(struct xt_connlimit_info),
+               .destroy    = connlimit_mt_destroy,
+               .me         = THIS_MODULE,
+       },
+       {
+               .name       = "connlimit",
+               .revision   = 1,
+               .family     = NFPROTO_UNSPEC,
+               .checkentry = connlimit_mt_check,
+               .match      = connlimit_mt,
+               .matchsize  = sizeof(struct xt_connlimit_info),
+               .destroy    = connlimit_mt_destroy,
+               .me         = THIS_MODULE,
+       },
  };
  
  static int __init connlimit_mt_init(void)
  {
-       return xt_register_match(&connlimit_mt_reg);
+       return xt_register_matches(connlimit_mt_reg,
+              ARRAY_SIZE(connlimit_mt_reg));
  }
  
  static void __exit connlimit_mt_exit(void)
  {
-       xt_unregister_match(&connlimit_mt_reg);
+       xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
  }
  
  module_init(connlimit_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c

index e536710..4ef1b63 100644 (file)
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
         return true;
  }
  
+static inline bool
+port_match(u16 min, u16 max, u16 port, bool invert)
+{
+       return (port >= min && port <= max) ^ invert;
+}
+
+static inline bool
+ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
+                      const struct nf_conn *ct)
+{
+       const struct nf_conntrack_tuple *tuple;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+       if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+           (nf_ct_protonum(ct) == info->l4proto) ^
+           !(info->invert_flags & XT_CONNTRACK_PROTO))
+               return false;
+
+       /* Shortcut to match all recognized protocols by using ->src.all. */
+       if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+           !port_match(info->origsrc_port, info->origsrc_port_high,
+                       ntohs(tuple->src.u.all),
+                       info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+               return false;
+
+       if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+           !port_match(info->origdst_port, info->origdst_port_high,
+                       ntohs(tuple->dst.u.all),
+                       info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+               return false;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+       if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+           !port_match(info->replsrc_port, info->replsrc_port_high,
+                       ntohs(tuple->src.u.all),
+                       info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+               return false;
+
+       if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+           !port_match(info->repldst_port, info->repldst_port_high,
+                       ntohs(tuple->dst.u.all),
+                       info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+               return false;
+
+       return true;
+}
+
  static bool
  conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
               u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
                     !(info->invert_flags & XT_CONNTRACK_REPLDST))
                         return false;
  
-       if (!ct_proto_port_check(info, ct))
-               return false;
+       if (par->match->revision != 3) {
+               if (!ct_proto_port_check(info, ct))
+                       return false;
+       } else {
+               if (!ct_proto_port_check_v3(par->matchinfo, ct))
+                       return false;
+       }
  
         if ((info->match_flags & XT_CONNTRACK_STATUS) &&
             (!!(status_mask & ct->status) ^
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
         return conntrack_mt(skb, par, info->state_mask, info->status_mask);
  }
  
+static bool
+conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
+
+       return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
  static int conntrack_mt_check(const struct xt_mtchk_param *par)
  {
         int ret;
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
                 .destroy    = conntrack_mt_destroy,
                 .me         = THIS_MODULE,
         },
+       {
+               .name       = "conntrack",
+               .revision   = 3,
+               .family     = NFPROTO_UNSPEC,
+               .matchsize  = sizeof(struct xt_conntrack_mtinfo3),
+               .match      = conntrack_mt_v3,
+               .checkentry = conntrack_mt_check,
+               .destroy    = conntrack_mt_destroy,
+               .me         = THIS_MODULE,
+       },
  };
  
  static int __init conntrack_mt_init(void)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c

index b39db8a..c7a2e54 100644 (file)
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
  MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
  
  static int cpu_mt_check(const struct xt_mtchk_param *par)
  {
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c

index 9127a3d..bb10b07 100644 (file)
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
         /*
          * Check if the packet belongs to an existing entry
          */
-       cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+       cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
         if (unlikely(cp == NULL)) {
                 match = false;
                 goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c

index 91cb1d7..c60649e 100644 (file)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -164,7 +164,6 @@ struct packet_mreq_max {
  static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
                 int closing, int tx_ring);
  
-#define PGV_FROM_VMALLOC 1
  struct pgv {
         char *buffer;
  };
@@ -523,11 +522,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
  {
         struct sk_filter *filter;
  
-       rcu_read_lock_bh();
-       filter = rcu_dereference_bh(sk->sk_filter);
+       rcu_read_lock();
+       filter = rcu_dereference(sk->sk_filter);
         if (filter != NULL)
                 res = sk_run_filter(skb, filter->insns);
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
  
         return res;
  }
diff --git a/net/rds/rds.h b/net/rds/rds.h

index 9542449..da8adac 100644 (file)
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
  #define RDS_FRAG_SIZE  ((unsigned int)(1 << RDS_FRAG_SHIFT))
  
  #define RDS_CONG_MAP_BYTES     (65536 / 8)
-#define RDS_CONG_MAP_LONGS     (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
  #define RDS_CONG_MAP_PAGES     (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
  #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
  
diff --git a/net/sched/Kconfig b/net/sched/Kconfig

index f04d4a4..e318f45 100644 (file)
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -205,6 +205,18 @@ config NET_SCH_DRR
  
           If unsure, say N.
  
+config NET_SCH_MQPRIO
+       tristate "Multi-queue priority scheduler (MQPRIO)"
+       help
+         Say Y here if you want to use the Multi-queue Priority scheduler.
+         This scheduler allows QOS to be offloaded on NICs that have support
+         for offloading QOS schedulers.
+
+         To compile this driver as a module, choose M here: the module will
+         be called sch_mqprio.
+
+         If unsure, say N.
+
  config NET_SCH_INGRESS
         tristate "Ingress Qdisc"
         depends on NET_CLS_ACT
@@ -243,7 +255,7 @@ config NET_CLS_TCINDEX
  
  config NET_CLS_ROUTE4
         tristate "Routing decision (ROUTE)"
-       select NET_CLS_ROUTE
+       select IP_ROUTE_CLASSID
         select NET_CLS
         ---help---
           If you say Y here, you will be able to classify packets
@@ -252,9 +264,6 @@ config NET_CLS_ROUTE4
           To compile this code as a module, choose M here: the
           module will be called cls_route.
  
-config NET_CLS_ROUTE
-       bool
-
  config NET_CLS_FW
         tristate "Netfilter mark (FW)"
         select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile

index 960f5db..26ce681 100644 (file)
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)  += sch_multiq.o
  obj-$(CONFIG_NET_SCH_ATM)      += sch_atm.o
  obj-$(CONFIG_NET_SCH_NETEM)    += sch_netem.o
  obj-$(CONFIG_NET_SCH_DRR)      += sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
  obj-$(CONFIG_NET_CLS_U32)      += cls_u32.o
  obj-$(CONFIG_NET_CLS_ROUTE4)   += cls_route.o
  obj-$(CONFIG_NET_CLS_FW)       += cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c

index 23b25f8..15873e1 100644 (file)
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
                            struct tc_action *a, struct tcf_hashinfo *hinfo)
  {
         struct tcf_common *p;
-       int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+       int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
         struct nlattr *nest;
  
         read_lock_bh(hinfo->lock);
@@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
  {
         struct tcf_common *p, *s_p;
         struct nlattr *nest;
-       int i= 0, n_i = 0;
+       int i = 0, n_i = 0;
  
         nest = nla_nest_start(skb, a->order);
         if (nest == NULL)
@@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
                 while (p != NULL) {
                         s_p = p->tcfc_next;
                         if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
-                                module_put(a->ops->owner);
+                               module_put(a->ops->owner);
                         n_i++;
                         p = s_p;
                 }
@@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
         nest = nla_nest_start(skb, TCA_OPTIONS);
         if (nest == NULL)
                 goto nla_put_failure;
-       if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+       err = tcf_action_dump_old(skb, a, bind, ref);
+       if (err > 0) {
                 nla_nest_end(skb, nest);
                 return err;
         }
@@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
         struct tc_action *a;
         struct tc_action_ops *a_o;
         char act_name[IFNAMSIZ];
-       struct nlattr *tb[TCA_ACT_MAX+1];
+       struct nlattr *tb[TCA_ACT_MAX + 1];
         struct nlattr *kind;
         int err;
  
@@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
                 goto err_free;
  
         /* module count goes up only when brand new policy is created
-          if it exists and is only bound to in a_o->init() then
-          ACT_P_CREATED is not returned (a zero is).
-       */
+        * if it exists and is only bound to in a_o->init() then
+        * ACT_P_CREATED is not returned (a zero is).
+        */
         if (err != ACT_P_CREATED)
                 module_put(a_o->owner);
         a->ops = a_o;
@@ -569,7 +570,7 @@ err_out:
  struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
                                   char *name, int ovr, int bind)
  {
-       struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+       struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
         struct tc_action *head = NULL, *act, *act_prev = NULL;
         int err;
         int i;
@@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
  static struct tc_action *
  tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
  {
-       struct nlattr *tb[TCA_ACT_MAX+1];
+       struct nlattr *tb[TCA_ACT_MAX + 1];
         struct tc_action *a;
         int index;
         int err;
@@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
         struct tcamsg *t;
         struct netlink_callback dcb;
         struct nlattr *nest;
-       struct nlattr *tb[TCA_ACT_MAX+1];
+       struct nlattr *tb[TCA_ACT_MAX + 1];
         struct nlattr *kind;
         struct tc_action *a = create_a(0);
         int err = -ENOMEM;
@@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
         nlh->nlmsg_flags |= NLM_F_ROOT;
         module_put(a->ops->owner);
         kfree(a);
-       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                            n->nlmsg_flags & NLM_F_ECHO);
         if (err > 0)
                 return 0;
  
@@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
               u32 pid, int event)
  {
         int i, ret;
-       struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+       struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
         struct tc_action *head = NULL, *act, *act_prev = NULL;
  
         ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
         if (ret < 0)
                 return ret;
  
-       if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+       if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
                 if (tb[1] != NULL)
                         return tca_action_flush(net, tb[1], n, pid);
                 else
@@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                 /* now do the delete */
                 tcf_action_destroy(head, 0);
                 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
-                                    n->nlmsg_flags&NLM_F_ECHO);
+                                    n->nlmsg_flags & NLM_F_ECHO);
                 if (ret > 0)
                         return 0;
                 return ret;
@@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
         NETLINK_CB(skb).dst_group = RTNLGRP_TC;
  
-       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
         if (err > 0)
                 err = 0;
         return err;
@@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
  
         /* dump then free all the actions after update; inserted policy
          * stays intact
-        * */
+        */
         ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
         for (a = act; a; a = act) {
                 act = a->next;
@@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                 return -EINVAL;
         }
  
-       /* n->nlmsg_flags&NLM_F_CREATE
-        * */
+       /* n->nlmsg_flags & NLM_F_CREATE */
         switch (n->nlmsg_type) {
         case RTM_NEWACTION:
                 /* we are going to assume all other flags
@@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                  * but since we want avoid ambiguity (eg when flags
                  * is zero) then just set this
                  */
-               if (n->nlmsg_flags&NLM_F_REPLACE)
+               if (n->nlmsg_flags & NLM_F_REPLACE)
                         ovr = 1;
  replay:
                 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1029,7 @@ replay:
  static struct nlattr *
  find_dump_kind(const struct nlmsghdr *n)
  {
-       struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+       struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
         struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
         struct nlattr *nla[TCAA_MAX + 1];
         struct nlattr *kind;
@@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
         }
  
         a_o = tc_lookup_action(kind);
-       if (a_o == NULL) {
+       if (a_o == NULL)
                 return 0;
-       }
  
         memset(&a, 0, sizeof(struct tc_action));
         a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c

index 83ddfc0..6cdf9ab 100644 (file)
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
         if (nla == NULL)
                 return -EINVAL;
  
-       err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+       err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
         if (err < 0)
                 return err;
  
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c

index c2ed90a..2b4ab4b 100644 (file)
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
  }
  
  typedef int (*g_rand)(struct tcf_gact *gact);
-static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
  #endif /* CONFIG_GACT_PROB */
  
  static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
                 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
                                      bind, &gact_idx_gen, &gact_hash_info);
                 if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                 ret = ACT_P_CREATED;
         } else {
                 if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
  static int __init gact_init_module(void)
  {
  #ifdef CONFIG_GACT_PROB
-       printk(KERN_INFO "GACT probability on\n");
+       pr_info("GACT probability on\n");
  #else
-       printk(KERN_INFO "GACT probability NOT on\n");
+       pr_info("GACT probability NOT on\n");
  #endif
         return tcf_register_action(&act_gact_ops);
  }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c

index c2a7c20..9fc211a 100644 (file)
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
                 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
                                      &ipt_idx_gen, &ipt_hash_info);
                 if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                 ret = ACT_P_CREATED;
         } else {
                 if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
         if (unlikely(!t))
                 goto err2;
  
-       if ((err = ipt_init_target(t, tname, hook)) < 0)
+       err = ipt_init_target(t, tname, hook);
+       if (err < 0)
                 goto err3;
  
         spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
         bstats_update(&ipt->tcf_bstats, skb);
  
         /* yes, we have to worry about both in and out dev
-        worry later - danger - this API seems to have changed
-        from earlier kernels */
+        * worry later - danger - this API seems to have changed
+        * from earlier kernels
+        */
         par.in       = skb->dev;
         par.out      = NULL;
         par.hooknum  = ipt->tcfi_hook;
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
         struct tc_cnt c;
  
         /* for simple targets kernel size == user size
-       ** user name = target name
-       ** for foolproof you need to not assume this
-       */
+        * user name = target name
+        * for foolproof you need to not assume this
+        */
  
         t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
         if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c

index d765067..961386e 100644 (file)
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
         .lock   =       &mirred_lock,
  };
  
-static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static int tcf_mirred_release(struct tcf_mirred *m, int bind)
  {
         if (m) {
                 if (bind)
                         m->tcf_bindcnt--;
                 m->tcf_refcnt--;
-               if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+               if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
                         list_del(&m->tcfm_list);
                         if (m->tcfm_dev)
                                 dev_put(m->tcfm_dev);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c

index 178a4bd..762b027 100644 (file)
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
                 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
                                      &nat_idx_gen, &nat_hash_info);
                 if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                 p = to_tcf_nat(pc);
                 ret = ACT_P_CREATED;
         } else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c

index 445bef7..50c7c06 100644 (file)
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
                 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
                                      &pedit_idx_gen, &pedit_hash_info);
                 if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                 p = to_pedit(pc);
                 keys = kmalloc(ksize, GFP_KERNEL);
                 if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
         int i, munged = 0;
         unsigned int off;
  
-       if (skb_cloned(skb)) {
-               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-                       return p->tcf_action;
-               }
-       }
+       if (skb_cloned(skb) &&
+           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+               return p->tcf_action;
  
         off = skb_network_offset(skb);
  
diff --git a/net/sched/act_police.c b/net/sched/act_police.c

index e2f08b1..8a16307 100644 (file)
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
  #include <net/act_api.h>
  #include <net/netlink.h>
  
-#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
  
  #define POL_TAB_MASK     15
  static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
  };
  
  /* old policer structure from before tc actions */
-struct tc_police_compat
-{
+struct tc_police_compat {
         u32                     index;
         int                     action;
         u32                     limit;
@@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
  static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
                                  struct tc_action *a, int ovr, int bind)
  {
-       unsigned h;
+       unsigned int h;
         int ret = 0, err;
         struct nlattr *tb[TCA_POLICE_MAX + 1];
         struct tc_police *parm;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c

index 7287cff..a34a22d 100644 (file)
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
         /* print policy string followed by _ then packet count
          * Example if this was the 3rd packet and the string was "hello"
          * then it would look like "hello_3" (without quotes)
-        **/
+        */
         pr_info("simple: %s_%d\n",
                (char *)d->tcfd_defdata, d->tcf_bstats.packets);
         spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
                 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
                                      &simp_idx_gen, &simp_hash_info);
                 if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
  
                 d = to_defact(pc);
                 ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
         return ret;
  }
  
-static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+static int tcf_simp_cleanup(struct tc_action *a, int bind)
  {
         struct tcf_defact *d = a->priv;
  
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
         return 0;
  }
  
-static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
-                               int bind, int ref)
+static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+                        int bind, int ref)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c

index 836f5fe..5f6f0c7 100644 (file)
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
                 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
                                      &skbedit_idx_gen, &skbedit_hash_info);
                 if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
  
                 d = to_skbedit(pc);
                 ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
         return ret;
  }
  
-static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
  {
         struct tcf_skbedit *d = a->priv;
  
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
         return 0;
  }
  
-static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
-                               int bind, int ref)
+static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+                           int bind, int ref)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c

index 5fd0c28..bb2c523 100644 (file)
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
         int rc = -ENOENT;
  
         write_lock(&cls_mod_lock);
-       for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+       for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
                 if (t == ops)
                         break;
  
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
         u32 first = TC_H_MAKE(0xC0000000U, 0U);
  
         if (tp)
-               first = tp->prio-1;
+               first = tp->prio - 1;
  
         return first;
  }
@@ -149,7 +149,8 @@ replay:
  
         if (prio == 0) {
                 /* If no priority is given, user wants we allocated it. */
-               if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+               if (n->nlmsg_type != RTM_NEWTFILTER ||
+                   !(n->nlmsg_flags & NLM_F_CREATE))
                         return -ENOENT;
                 prio = TC_H_MAKE(0x80000000U, 0U);
         }
@@ -176,7 +177,8 @@ replay:
         }
  
         /* Is it classful? */
-       if ((cops = q->ops->cl_ops) == NULL)
+       cops = q->ops->cl_ops;
+       if (!cops)
                 return -EINVAL;
  
         if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
                 goto errout;
  
         /* Check the chain for existence of proto-tcf with this priority */
-       for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+       for (back = chain; (tp = *back) != NULL; back = &tp->next) {
                 if (tp->prio >= prio) {
                         if (tp->prio == prio) {
-                               if (!nprio || (tp->protocol != protocol && protocol))
+                               if (!nprio ||
+                                   (tp->protocol != protocol && protocol))
                                         goto errout;
                         } else
                                 tp = NULL;
@@ -216,7 +219,8 @@ replay:
                         goto errout;
  
                 err = -ENOENT;
-               if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+               if (n->nlmsg_type != RTM_NEWTFILTER ||
+                   !(n->nlmsg_flags & NLM_F_CREATE))
                         goto errout;
  
  
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
  
         if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                 return skb->len;
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                 return skb->len;
  
         if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
         if (!q)
                 goto out;
-       if ((cops = q->ops->cl_ops) == NULL)
+       cops = q->ops->cl_ops;
+       if (!cops)
                 goto errout;
         if (cops->tcf_chain == NULL)
                 goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
  
         s_t = cb->args[0];
  
-       for (tp=*chain, t=0; tp; tp = tp->next, t++) {
-               if (t < s_t) continue;
+       for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+               if (t < s_t)
+                       continue;
                 if (TC_H_MAJ(tcm->tcm_info) &&
                     TC_H_MAJ(tcm->tcm_info) != tp->prio)
                         continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                 arg.skb = skb;
                 arg.cb = cb;
                 arg.w.stop = 0;
-               arg.w.skip = cb->args[1]-1;
+               arg.w.skip = cb->args[1] - 1;
                 arg.w.count = 0;
                 tp->ops->walk(tp, &arg.w);
-               cb->args[1] = arg.w.count+1;
+               cb->args[1] = arg.w.count + 1;
                 if (arg.w.stop)
                         break;
         }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c

index f23d915..8be8872 100644 (file)
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
  #include <net/act_api.h>
  #include <net/pkt_cls.h>
  
-struct basic_head
-{
+struct basic_head {
         u32                     hgenerator;
         struct list_head        flist;
  };
  
-struct basic_filter
-{
+struct basic_filter {
         u32                     handle;
         struct tcf_exts         exts;
         struct tcf_ematch_tree  ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
         return 0;
  }
  
-static inline void basic_delete_filter(struct tcf_proto *tp,
-                                      struct basic_filter *f)
+static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
  {
         tcf_unbind_filter(tp, &f->res);
         tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
         [TCA_BASIC_EMATCHES]    = { .type = NLA_NESTED },
  };
  
-static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-                                 unsigned long base, struct nlattr **tb,
-                                 struct nlattr *est)
+static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+                          unsigned long base, struct nlattr **tb,
+                          struct nlattr *est)
  {
         int err = -EINVAL;
         struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
                 } while (--i > 0 && basic_get(tp, head->hgenerator));
  
                 if (i <= 0) {
-                       printk(KERN_ERR "Insufficient number of handles\n");
+                       pr_err("Insufficient number of handles\n");
                         goto errout;
                 }
  
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c

index d49c40f..32a3351 100644 (file)
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
  {
         struct cgroup_cls_state *cs;
  
-       if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+       if (!cs)
                 return ERR_PTR(-ENOMEM);
  
         if (cgrp->parent)
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
         return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
  }
  
-struct cls_cgroup_head
-{
+struct cls_cgroup_head {
         u32                     handle;
         struct tcf_exts         exts;
         struct tcf_ematch_tree  ematches;
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
                              u32 handle, struct nlattr **tca,
                              unsigned long *arg)
  {
-       struct nlattr *tb[TCA_CGROUP_MAX+1];
+       struct nlattr *tb[TCA_CGROUP_MAX + 1];
         struct cls_cgroup_head *head = tp->root;
         struct tcf_ematch_tree t;
         struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c

index 5b271a1..8ec0139 100644 (file)
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb)
                 if (!pskb_network_may_pull(skb, sizeof(*iph)))
                         break;
                 iph = ip_hdr(skb);
-               if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+               if (iph->frag_off & htons(IP_MF | IP_OFFSET))
                         break;
                 poff = proto_ports_offset(iph->protocol);
                 if (poff >= 0 &&
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
                 if (!pskb_network_may_pull(skb, sizeof(*iph)))
                         break;
                 iph = ip_hdr(skb);
-               if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+               if (iph->frag_off & htons(IP_MF | IP_OFFSET))
                         break;
                 poff = proto_ports_offset(iph->protocol);
                 if (poff >= 0 &&
@@ -276,7 +276,7 @@ fallback:
  
  static u32 flow_get_rtclassid(const struct sk_buff *skb)
  {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
         if (skb_dst(skb))
                 return skb_dst(skb)->tclassid;
  #endif
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c

index 93b0a7b..26e7bc4 100644 (file)
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
  
  #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
  
-struct fw_head
-{
+struct fw_head {
         struct fw_filter *ht[HTSIZE];
         u32 mask;
  };
  
-struct fw_filter
-{
+struct fw_filter {
         struct fw_filter        *next;
         u32                     id;
         struct tcf_result       res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
         .police = TCA_FW_POLICE
  };
  
-static __inline__ int fw_hash(u32 handle)
+static inline int fw_hash(u32 handle)
  {
         if (HTSIZE == 4096)
                 return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
  static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
                           struct tcf_result *res)
  {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
         struct fw_filter *f;
         int r;
         u32 id = skb->mark;
  
         if (head != NULL) {
                 id &= head->mask;
-               for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+               for (f = head->ht[fw_hash(id)]; f; f = f->next) {
                         if (f->id == id) {
                                 *res = f->res;
  #ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
                 }
         } else {
                 /* old method */
-               if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+               if (id && (TC_H_MAJ(id) == 0 ||
+                          !(TC_H_MAJ(id ^ tp->q->handle)))) {
                         res->classid = id;
                         res->class = 0;
                         return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
  
  static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
  {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
         struct fw_filter *f;
  
         if (head == NULL)
                 return 0;
  
-       for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+       for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
                 if (f->id == handle)
                         return (unsigned long)f;
         }
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
         return 0;
  }
  
-static inline void
-fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
  {
         tcf_unbind_filter(tp, &f->res);
         tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
         if (head == NULL)
                 return;
  
-       for (h=0; h<HTSIZE; h++) {
-               while ((f=head->ht[h]) != NULL) {
+       for (h = 0; h < HTSIZE; h++) {
+               while ((f = head->ht[h]) != NULL) {
                         head->ht[h] = f->next;
                         fw_delete_filter(tp, f);
                 }
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
  
  static int fw_delete(struct tcf_proto *tp, unsigned long arg)
  {
-       struct fw_head *head = (struct fw_head*)tp->root;
-       struct fw_filter *f = (struct fw_filter*)arg;
+       struct fw_head *head = (struct fw_head *)tp->root;
+       struct fw_filter *f = (struct fw_filter *)arg;
         struct fw_filter **fp;
  
         if (head == NULL || f == NULL)
                 goto out;
  
-       for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+       for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
                 if (*fp == f) {
                         tcf_tree_lock(tp);
                         *fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
                      struct nlattr **tca,
                      unsigned long *arg)
  {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
         struct fw_filter *f = (struct fw_filter *) *arg;
         struct nlattr *opt = tca[TCA_OPTIONS];
         struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
  
  static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
  {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
         int h;
  
         if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
                    struct sk_buff *skb, struct tcmsg *t)
  {
         struct fw_head *head = (struct fw_head *)tp->root;
-       struct fw_filter *f = (struct fw_filter*)fh;
+       struct fw_filter *f = (struct fw_filter *)fh;
         unsigned char *b = skb_tail_pointer(skb);
         struct nlattr *nest;
  
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c

index 694dcd8..d580cdf 100644 (file)
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
  #include <net/pkt_cls.h>
  
  /*
-   1. For now we assume that route tags < 256.
-      It allows to use direct table lookups, instead of hash tables.
-   2. For now we assume that "from TAG" and "fromdev DEV" statements
-      are mutually  exclusive.
-   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ * 1. For now we assume that route tags < 256.
+ *    It allows to use direct table lookups, instead of hash tables.
+ * 2. For now we assume that "from TAG" and "fromdev DEV" statements
+ *    are mutually  exclusive.
+ * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
   */
  
-struct route4_fastmap
-{
+struct route4_fastmap {
         struct route4_filter    *filter;
         u32                     id;
         int                     iif;
  };
  
-struct route4_head
-{
+struct route4_head {
         struct route4_fastmap   fastmap[16];
-       struct route4_bucket    *table[256+1];
+       struct route4_bucket    *table[256 + 1];
  };
  
-struct route4_bucket
-{
+struct route4_bucket {
         /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
-       struct route4_filter    *ht[16+16+1];
+       struct route4_filter    *ht[16 + 16 + 1];
  };
  
-struct route4_filter
-{
+struct route4_filter {
         struct route4_filter    *next;
         u32                     id;
         int                     iif;
@@ -61,20 +57,20 @@ struct route4_filter
         struct route4_bucket    *bkt;
  };
  
-#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
  
  static const struct tcf_ext_map route_ext_map = {
         .police = TCA_ROUTE4_POLICE,
         .action = TCA_ROUTE4_ACT
  };
  
-static __inline__ int route4_fastmap_hash(u32 id, int iif)
+static inline int route4_fastmap_hash(u32 id, int iif)
  {
-       return id&0xF;
+       return id & 0xF;
  }
  
-static inline
-void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+static void
+route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
  {
         spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
  
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
         spin_unlock_bh(root_lock);
  }
  
-static inline void
+static void
  route4_set_fastmap(struct route4_head *head, u32 id, int iif,
                    struct route4_filter *f)
  {
         int h = route4_fastmap_hash(id, iif);
+
         head->fastmap[h].id = id;
         head->fastmap[h].iif = iif;
         head->fastmap[h].filter = f;
  }
  
-static __inline__ int route4_hash_to(u32 id)
+static inline int route4_hash_to(u32 id)
  {
-       return id&0xFF;
+       return id & 0xFF;
  }
  
-static __inline__ int route4_hash_from(u32 id)
+static inline int route4_hash_from(u32 id)
  {
-       return (id>>16)&0xF;
+       return (id >> 16) & 0xF;
  }
  
-static __inline__ int route4_hash_iif(int iif)
+static inline int route4_hash_iif(int iif)
  {
-       return 16 + ((iif>>16)&0xF);
+       return 16 + ((iif >> 16) & 0xF);
  }
  
-static __inline__ int route4_hash_wild(void)
+static inline int route4_hash_wild(void)
  {
         return 32;
  }
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
  static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
                            struct tcf_result *res)
  {
-       struct route4_head *head = (struct route4_head*)tp->root;
+       struct route4_head *head = (struct route4_head *)tp->root;
         struct dst_entry *dst;
         struct route4_bucket *b;
         struct route4_filter *f;
         u32 id, h;
         int iif, dont_cache = 0;
  
-       if ((dst = skb_dst(skb)) == NULL)
+       dst = skb_dst(skb);
+       if (!dst)
                 goto failure;
  
         id = dst->tclassid;
         if (head == NULL)
                 goto old_method;
  
-       iif = ((struct rtable*)dst)->fl.iif;
+       iif = ((struct rtable *)dst)->fl.iif;
  
         h = route4_fastmap_hash(id, iif);
         if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
         h = route4_hash_to(id);
  
  restart:
-       if ((b = head->table[h]) != NULL) {
+       b = head->table[h];
+       if (b) {
                 for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
                         if (f->id == id)
                                 ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
  
  static inline u32 to_hash(u32 id)
  {
-       u32 h = id&0xFF;
-       if (id&0x8000)
+       u32 h = id & 0xFF;
+
+       if (id & 0x8000)
                 h += 256;
         return h;
  }
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
         if (!(id & 0x8000)) {
                 if (id > 255)
                         return 256;
-               return id&0xF;
+               return id & 0xF;
         }
-       return 16 + (id&0xF);
+       return 16 + (id & 0xF);
  }
  
  static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
  {
-       struct route4_head *head = (struct route4_head*)tp->root;
+       struct route4_head *head = (struct route4_head *)tp->root;
         struct route4_bucket *b;
         struct route4_filter *f;
-       unsigned h1, h2;
+       unsigned int h1, h2;
  
         if (!head)
                 return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
         if (h1 > 256)
                 return 0;
  
-       h2 = from_hash(handle>>16);
+       h2 = from_hash(handle >> 16);
         if (h2 > 32)
                 return 0;
  
-       if ((b = head->table[h1]) != NULL) {
+       b = head->table[h1];
+       if (b) {
                 for (f = b->ht[h2]; f; f = f->next)
                         if (f->handle == handle)
                                 return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
         return 0;
  }
  
-static inline void
+static void
  route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
  {
         tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
         if (head == NULL)
                 return;
  
-       for (h1=0; h1<=256; h1++) {
+       for (h1 = 0; h1 <= 256; h1++) {
                 struct route4_bucket *b;
  
-               if ((b = head->table[h1]) != NULL) {
-                       for (h2=0; h2<=32; h2++) {
+               b = head->table[h1];
+               if (b) {
+                       for (h2 = 0; h2 <= 32; h2++) {
                                 struct route4_filter *f;
  
                                 while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
  
  static int route4_delete(struct tcf_proto *tp, unsigned long arg)
  {
-       struct route4_head *head = (struct route4_head*)tp->root;
-       struct route4_filter **fp, *f = (struct route4_filter*)arg;
-       unsigned h = 0;
+       struct route4_head *head = (struct route4_head *)tp->root;
+       struct route4_filter **fp, *f = (struct route4_filter *)arg;
+       unsigned int h = 0;
         struct route4_bucket *b;
         int i;
  
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
         h = f->handle;
         b = f->bkt;
  
-       for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+       for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
                 if (*fp == f) {
                         tcf_tree_lock(tp);
                         *fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
  
                         /* Strip tree */
  
-                       for (i=0; i<=32; i++)
+                       for (i = 0; i <= 32; i++)
                                 if (b->ht[i])
                                         return 0;
  
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
         }
  
         h1 = to_hash(nhandle);
-       if ((b = head->table[h1]) == NULL) {
+       b = head->table[h1];
+       if (!b) {
                 err = -ENOBUFS;
                 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
                 if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
                 tcf_tree_unlock(tp);
         } else {
                 unsigned int h2 = from_hash(nhandle >> 16);
+
                 err = -EEXIST;
                 for (fp = b->ht[h2]; fp; fp = fp->next)
                         if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
         if (err < 0)
                 return err;
  
-       if ((f = (struct route4_filter*)*arg) != NULL) {
+       f = (struct route4_filter *)*arg;
+       if (f) {
                 if (f->handle != handle && handle)
                         return -EINVAL;
  
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
  
  reinsert:
         h = from_hash(f->handle >> 16);
-       for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+       for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
                 if (f->handle < f1->handle)
                         break;
  
@@ -492,7 +497,8 @@ reinsert:
         if (old_handle && f->handle != old_handle) {
                 th = to_hash(old_handle);
                 h = from_hash(old_handle >> 16);
-               if ((b = head->table[th]) != NULL) {
+               b = head->table[th];
+               if (b) {
                         for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
                                 if (*fp == f) {
                                         *fp = f->next;
@@ -515,7 +521,7 @@ errout:
  static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
  {
         struct route4_head *head = tp->root;
-       unsigned h, h1;
+       unsigned int h, h1;
  
         if (head == NULL)
                 arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
  static int route4_dump(struct tcf_proto *tp, unsigned long fh,
                        struct sk_buff *skb, struct tcmsg *t)
  {
-       struct route4_filter *f = (struct route4_filter*)fh;
+       struct route4_filter *f = (struct route4_filter *)fh;
         unsigned char *b = skb_tail_pointer(skb);
         struct nlattr *nest;
         u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
         if (nest == NULL)
                 goto nla_put_failure;
  
-       if (!(f->handle&0x8000)) {
-               id = f->id&0xFF;
+       if (!(f->handle & 0x8000)) {
+               id = f->id & 0xFF;
                 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
         }
-       if (f->handle&0x80000000) {
-               if ((f->handle>>16) != 0xFFFF)
+       if (f->handle & 0x80000000) {
+               if ((f->handle >> 16) != 0xFFFF)
                         NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
         } else {
-               id = f->id>>16;
+               id = f->id >> 16;
                 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
         }
         if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h

index 425a179..402c44b 100644 (file)
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
     powerful classification engine.  */
  
  
-struct rsvp_head
-{
+struct rsvp_head {
         u32                     tmap[256/32];
         u32                     hgenerator;
         u8                      tgenerator;
         struct rsvp_session     *ht[256];
  };
  
-struct rsvp_session
-{
+struct rsvp_session {
         struct rsvp_session     *next;
         __be32                  dst[RSVP_DST_LEN];
         struct tc_rsvp_gpi      dpi;
         u8                      protocol;
         u8                      tunnelid;
         /* 16 (src,sport) hash slots, and one wildcard source slot */
-       struct rsvp_filter      *ht[16+1];
+       struct rsvp_filter      *ht[16 + 1];
  };
  
  
-struct rsvp_filter
-{
+struct rsvp_filter {
         struct rsvp_filter      *next;
         __be32                  src[RSVP_DST_LEN];
         struct tc_rsvp_gpi      spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
         struct rsvp_session     *sess;
  };
  
-static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
  {
-       unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+       unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
+
         h ^= h>>16;
         h ^= h>>8;
         return (h ^ protocol ^ tunnelid) & 0xFF;
  }
  
-static __inline__ unsigned hash_src(__be32 *src)
+static inline unsigned int hash_src(__be32 *src)
  {
-       unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+       unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
+
         h ^= h>>16;
         h ^= h>>8;
         h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
  static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
                          struct tcf_result *res)
  {
-       struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+       struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
         struct rsvp_session *s;
         struct rsvp_filter *f;
-       unsigned h1, h2;
+       unsigned int h1, h2;
         __be32 *dst, *src;
         u8 protocol;
         u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
         src = &nhptr->saddr.s6_addr32[0];
         dst = &nhptr->daddr.s6_addr32[0];
         protocol = nhptr->nexthdr;
-       xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+       xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
  #else
         src = &nhptr->saddr;
         dst = &nhptr->daddr;
         protocol = nhptr->protocol;
-       xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
-       if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+       xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
+       if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
                 return -1;
  #endif
  
@@ -176,10 +175,10 @@ restart:
         h2 = hash_src(src);
  
         for (s = sht[h1]; s; s = s->next) {
-               if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+               if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
                     protocol == s->protocol &&
                     !(s->dpi.mask &
-                     (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
+                     (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
  #if RSVP_DST_LEN == 4
                     dst[0] == s->dst[0] &&
                     dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
                     tunnelid == s->tunnelid) {
  
                         for (f = s->ht[h2]; f; f = f->next) {
-                               if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
-                                   !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+                               if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
+                                   !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
  #if RSVP_DST_LEN == 4
                                     &&
                                     src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
                                                 return 0;
  
                                         tunnelid = f->res.classid;
-                                       nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+                                       nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
                                         goto restart;
                                 }
                         }
@@ -224,11 +223,11 @@ matched:
  
  static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
  {
-       struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+       struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
         struct rsvp_session *s;
         struct rsvp_filter *f;
-       unsigned h1 = handle&0xFF;
-       unsigned h2 = (handle>>8)&0xFF;
+       unsigned int h1 = handle & 0xFF;
+       unsigned int h2 = (handle >> 8) & 0xFF;
  
         if (h2 > 16)
                 return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
         return -ENOBUFS;
  }
  
-static inline void
+static void
  rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
  {
         tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
  
         sht = data->ht;
  
-       for (h1=0; h1<256; h1++) {
+       for (h1 = 0; h1 < 256; h1++) {
                 struct rsvp_session *s;
  
                 while ((s = sht[h1]) != NULL) {
                         sht[h1] = s->next;
  
-                       for (h2=0; h2<=16; h2++) {
+                       for (h2 = 0; h2 <= 16; h2++) {
                                 struct rsvp_filter *f;
  
                                 while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
  
  static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
  {
-       struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
-       unsigned h = f->handle;
+       struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+       unsigned int h = f->handle;
         struct rsvp_session **sp;
         struct rsvp_session *s = f->sess;
         int i;
  
-       for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+       for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
                 if (*fp == f) {
                         tcf_tree_lock(tp);
                         *fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
  
                         /* Strip tree */
  
-                       for (i=0; i<=16; i++)
+                       for (i = 0; i <= 16; i++)
                                 if (s->ht[i])
                                         return 0;
  
                         /* OK, session has no flows */
-                       for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+                       for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
                              *sp; sp = &(*sp)->next) {
                                 if (*sp == s) {
                                         tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
         return 0;
  }
  
-static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
  {
         struct rsvp_head *data = tp->root;
         int i = 0xFFFF;
  
         while (i-- > 0) {
                 u32 h;
+
                 if ((data->hgenerator += 0x10000) == 0)
                         data->hgenerator = 0x10000;
                 h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
  
  static int tunnel_bts(struct rsvp_head *data)
  {
-       int n = data->tgenerator>>5;
-       u32 b = 1<<(data->tgenerator&0x1F);
+       int n = data->tgenerator >> 5;
+       u32 b = 1 << (data->tgenerator & 0x1F);
  
-       if (data->tmap[n]&b)
+       if (data->tmap[n] & b)
                 return 0;
         data->tmap[n] |= b;
         return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
  
         memset(tmap, 0, sizeof(tmap));
  
-       for (h1=0; h1<256; h1++) {
+       for (h1 = 0; h1 < 256; h1++) {
                 struct rsvp_session *s;
                 for (s = sht[h1]; s; s = s->next) {
-                       for (h2=0; h2<=16; h2++) {
+                       for (h2 = 0; h2 <= 16; h2++) {
                                 struct rsvp_filter *f;
  
                                 for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
  {
         int i, k;
  
-       for (k=0; k<2; k++) {
-               for (i=255; i>0; i--) {
+       for (k = 0; k < 2; k++) {
+               for (i = 255; i > 0; i--) {
                         if (++data->tgenerator == 0)
                                 data->tgenerator = 1;
                         if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
         struct nlattr *opt = tca[TCA_OPTIONS-1];
         struct nlattr *tb[TCA_RSVP_MAX + 1];
         struct tcf_exts e;
-       unsigned h1, h2;
+       unsigned int h1, h2;
         __be32 *dst;
         int err;
  
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
         if (err < 0)
                 return err;
  
-       if ((f = (struct rsvp_filter*)*arg) != NULL) {
+       f = (struct rsvp_filter *)*arg;
+       if (f) {
                 /* Node exists: adjust only classid */
  
                 if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
                         goto errout;
         }
  
-       for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+       for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
                     pinfo && pinfo->protocol == s->protocol &&
                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
                         tcf_exts_change(tp, &f->exts, &e);
  
                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
-                               if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+                               if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
                                         break;
                         f->next = *fp;
                         wmb();
@@ -567,7 +568,7 @@ errout2:
  static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
  {
         struct rsvp_head *head = tp->root;
-       unsigned h, h1;
+       unsigned int h, h1;
  
         if (arg->stop)
                 return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
  static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
                      struct sk_buff *skb, struct tcmsg *t)
  {
-       struct rsvp_filter *f = (struct rsvp_filter*)fh;
+       struct rsvp_filter *f = (struct rsvp_filter *)fh;
         struct rsvp_session *s;
         unsigned char *b = skb_tail_pointer(skb);
         struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
         NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
         if (f->res.classid)
                 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
-       if (((f->handle>>8)&0xFF) != 16)
+       if (((f->handle >> 8) & 0xFF) != 16)
                 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
  
         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c

index 20ef330..36667fa 100644 (file)
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
                  * of the hashing index is below the threshold.
                  */
                 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-                       cp.hash = (cp.mask >> cp.shift)+1;
+                       cp.hash = (cp.mask >> cp.shift) + 1;
                 else
                         cp.hash = DEFAULT_HASH_SIZE;
         }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c

index b0c2a82..966920c 100644 (file)
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
  #include <net/act_api.h>
  #include <net/pkt_cls.h>
  
-struct tc_u_knode
-{
+struct tc_u_knode {
         struct tc_u_knode       *next;
         u32                     handle;
         struct tc_u_hnode       *ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
         struct tc_u32_sel       sel;
  };
  
-struct tc_u_hnode
-{
+struct tc_u_hnode {
         struct tc_u_hnode       *next;
         u32                     handle;
         u32                     prio;
         struct tc_u_common      *tp_c;
         int                     refcnt;
-       unsigned                divisor;
+       unsigned int            divisor;
         struct tc_u_knode       *ht[1];
  };
  
-struct tc_u_common
-{
+struct tc_u_common {
         struct tc_u_hnode       *hlist;
         struct Qdisc            *q;
         int                     refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
         .police = TCA_U32_POLICE
  };
  
-static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+static inline unsigned int u32_hash_fold(__be32 key,
+                                        const struct tc_u32_sel *sel,
+                                        u8 fshift)
  {
-       unsigned h = ntohl(key & sel->hmask)>>fshift;
+       unsigned int h = ntohl(key & sel->hmask) >> fshift;
  
         return h;
  }
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
                 unsigned int      off;
         } stack[TC_U32_MAXDEPTH];
  
-       struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+       struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
         unsigned int off = skb_network_offset(skb);
         struct tc_u_knode *n;
         int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
                 struct tc_u32_key *key = n->sel.keys;
  
  #ifdef CONFIG_CLS_U32_PERF
-               n->pf->rcnt +=1;
+               n->pf->rcnt += 1;
                 j = 0;
  #endif
  
@@ -133,7 +132,7 @@ next_knode:
                 }
  #endif
  
-               for (i = n->sel.nkeys; i>0; i--, key++) {
+               for (i = n->sel.nkeys; i > 0; i--, key++) {
                         int toff = off + key->off + (off2 & key->offmask);
                         __be32 *data, _data;
  
@@ -148,13 +147,13 @@ next_knode:
                                 goto next_knode;
                         }
  #ifdef CONFIG_CLS_U32_PERF
-                       n->pf->kcnts[j] +=1;
+                       n->pf->kcnts[j] += 1;
                         j++;
  #endif
                 }
                 if (n->ht_down == NULL) {
  check_terminal:
-                       if (n->sel.flags&TC_U32_TERMINAL) {
+                       if (n->sel.flags & TC_U32_TERMINAL) {
  
                                 *res = n->res;
  #ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
                                 }
  #endif
  #ifdef CONFIG_CLS_U32_PERF
-                               n->pf->rhit +=1;
+                               n->pf->rhit += 1;
  #endif
                                 r = tcf_exts_exec(skb, &n->exts, res);
                                 if (r < 0) {
@@ -197,10 +196,10 @@ check_terminal:
                         sel = ht->divisor & u32_hash_fold(*data, &n->sel,
                                                           n->fshift);
                 }
-               if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+               if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
                         goto next_ht;
  
-               if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+               if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
                         off2 = n->sel.off + 3;
                         if (n->sel.flags & TC_U32_VAROFFSET) {
                                 __be16 *data, _data;
@@ -215,7 +214,7 @@ check_terminal:
                         }
                         off2 &= ~3;
                 }
-               if (n->sel.flags&TC_U32_EAT) {
+               if (n->sel.flags & TC_U32_EAT) {
                         off += off2;
                         off2 = 0;
                 }
@@ -236,11 +235,11 @@ out:
  
  deadloop:
         if (net_ratelimit())
-               printk(KERN_WARNING "cls_u32: dead loop\n");
+               pr_warning("cls_u32: dead loop\n");
         return -1;
  }
  
-static __inline__ struct tc_u_hnode *
+static struct tc_u_hnode *
  u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
  {
         struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
         return ht;
  }
  
-static __inline__ struct tc_u_knode *
+static struct tc_u_knode *
  u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
  {
-       unsigned sel;
+       unsigned int sel;
         struct tc_u_knode *n = NULL;
  
         sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
         do {
                 if (++tp_c->hgenerator == 0x7FF)
                         tp_c->hgenerator = 1;
-       } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+       } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
  
         return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
  }
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
  static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
  {
         struct tc_u_knode *n;
-       unsigned h;
+       unsigned int h;
  
-       for (h=0; h<=ht->divisor; h++) {
+       for (h = 0; h <= ht->divisor; h++) {
                 while ((n = ht->ht[h]) != NULL) {
                         ht->ht[h] = n->next;
  
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
  
  static int u32_delete(struct tcf_proto *tp, unsigned long arg)
  {
-       struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+       struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
  
         if (ht == NULL)
                 return 0;
  
         if (TC_U32_KEY(ht->handle))
-               return u32_delete_key(tp, (struct tc_u_knode*)ht);
+               return u32_delete_key(tp, (struct tc_u_knode *)ht);
  
         if (tp->root == ht)
                 return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
  static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
  {
         struct tc_u_knode *n;
-       unsigned i = 0x7FF;
+       unsigned int i = 0x7FF;
  
-       for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+       for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
                 if (i < TC_U32_NODE(n->handle))
                         i = TC_U32_NODE(n->handle);
         i++;
  
-       return handle|(i>0xFFF ? 0xFFF : i);
+       return handle | (i > 0xFFF ? 0xFFF : i);
  }
  
  static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
         if (err < 0)
                 return err;
  
-       if ((n = (struct tc_u_knode*)*arg) != NULL) {
+       n = (struct tc_u_knode *)*arg;
+       if (n) {
                 if (TC_U32_KEY(n->handle) == 0)
                         return -EINVAL;
  
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
         }
  
         if (tb[TCA_U32_DIVISOR]) {
-               unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+               unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
  
                 if (--divisor > 0x100)
                         return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
                         if (handle == 0)
                                 return -ENOMEM;
                 }
-               ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+               ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
                 if (ht == NULL)
                         return -ENOBUFS;
                 ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
         struct tc_u_common *tp_c = tp->data;
         struct tc_u_hnode *ht;
         struct tc_u_knode *n;
-       unsigned h;
+       unsigned int h;
  
         if (arg->stop)
                 return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
  static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                      struct sk_buff *skb, struct tcmsg *t)
  {
-       struct tc_u_knode *n = (struct tc_u_knode*)fh;
+       struct tc_u_knode *n = (struct tc_u_knode *)fh;
         struct nlattr *nest;
  
         if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                 goto nla_put_failure;
  
         if (TC_U32_KEY(n->handle) == 0) {
-               struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
-               u32 divisor = ht->divisor+1;
+               struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+               u32 divisor = ht->divisor + 1;
+
                 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
         } else {
                 NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                         goto nla_put_failure;
  
  #ifdef CONFIG_NET_CLS_IND
-               if(strlen(n->indev))
+               if (strlen(n->indev))
                         NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
  #endif
  #ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c

index bc45039..1c8360a 100644 (file)
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
                 return 0;
  
         switch (cmp->align) {
-               case TCF_EM_ALIGN_U8:
-                       val = *ptr;
-                       break;
+       case TCF_EM_ALIGN_U8:
+               val = *ptr;
+               break;
  
-               case TCF_EM_ALIGN_U16:
-                       val = get_unaligned_be16(ptr);
+       case TCF_EM_ALIGN_U16:
+               val = get_unaligned_be16(ptr);
  
-                       if (cmp_needs_transformation(cmp))
-                               val = be16_to_cpu(val);
-                       break;
+               if (cmp_needs_transformation(cmp))
+                       val = be16_to_cpu(val);
+               break;
  
-               case TCF_EM_ALIGN_U32:
-                       /* Worth checking boundries? The branching seems
-                        * to get worse. Visit again. */
-                       val = get_unaligned_be32(ptr);
+       case TCF_EM_ALIGN_U32:
+               /* Worth checking boundries? The branching seems
+                * to get worse. Visit again.
+                */
+               val = get_unaligned_be32(ptr);
  
-                       if (cmp_needs_transformation(cmp))
-                               val = be32_to_cpu(val);
-                       break;
+               if (cmp_needs_transformation(cmp))
+                       val = be32_to_cpu(val);
+               break;
  
-               default:
-                       return 0;
+       default:
+               return 0;
         }
  
         if (cmp->mask)
                 val &= cmp->mask;
  
         switch (cmp->opnd) {
-               case TCF_EM_OPND_EQ:
-                       return val == cmp->val;
-               case TCF_EM_OPND_LT:
-                       return val < cmp->val;
-               case TCF_EM_OPND_GT:
-                       return val > cmp->val;
+       case TCF_EM_OPND_EQ:
+               return val == cmp->val;
+       case TCF_EM_OPND_LT:
+               return val < cmp->val;
+       case TCF_EM_OPND_GT:
+               return val > cmp->val;
         }
  
         return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c

index 34da5e2..a889d09 100644 (file)
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -73,21 +73,18 @@
  #include <net/pkt_cls.h>
  #include <net/sock.h>
  
-struct meta_obj
-{
+struct meta_obj {
         unsigned long           value;
         unsigned int            len;
  };
  
-struct meta_value
-{
+struct meta_value {
         struct tcf_meta_val     hdr;
         unsigned long           val;
         unsigned int            len;
  };
  
-struct meta_match
-{
+struct meta_match {
         struct meta_value       lvalue;
         struct meta_value       rvalue;
  };
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
         if (unlikely(skb_dst(skb) == NULL))
                 *err = -1;
         else
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                 dst->value = skb_dst(skb)->tclassid;
  #else
                 dst->value = 0;
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
   * Meta value collectors assignment table
   **************************************************************************/
  
-struct meta_ops
-{
+struct meta_ops {
         void            (*get)(struct sk_buff *, struct tcf_pkt_info *,
                                struct meta_value *, struct meta_obj *, int *);
  };
@@ -494,7 +490,7 @@ struct meta_ops
  
  /* Meta value operations table listing all meta value collectors and
   * assigns them to a type and meta id. */
-static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
         [TCF_META_TYPE_VAR] = {
                 [META_ID(DEV)]                  = META_FUNC(var_dev),
                 [META_ID(SK_BOUND_IF)]          = META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
         }
  };
  
-static inline struct meta_ops * meta_ops(struct meta_value *val)
+static inline struct meta_ops *meta_ops(struct meta_value *val)
  {
         return &__meta_ops[meta_type(val)][meta_id(val)];
  }
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
  {
         if (v->len == sizeof(unsigned long))
                 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
-       else if (v->len == sizeof(u32)) {
+       else if (v->len == sizeof(u32))
                 NLA_PUT_U32(skb, tlv, v->val);
-       }
  
         return 0;
  
@@ -663,8 +658,7 @@ nla_put_failure:
   * Type specific operations table
   **************************************************************************/
  
-struct meta_type_ops
-{
+struct meta_type_ops {
         void    (*destroy)(struct meta_value *);
         int     (*compare)(struct meta_obj *, struct meta_obj *);
         int     (*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@ struct meta_type_ops
         int     (*dump)(struct sk_buff *, struct meta_value *, int);
  };
  
-static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
         [TCF_META_TYPE_VAR] = {
                 .destroy = meta_var_destroy,
                 .compare = meta_var_compare,
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
         }
  };
  
-static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
  {
         return &__meta_type_ops[meta_type(v)];
  }
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
                 return err;
  
         if (meta_type_ops(v)->apply_extras)
-           meta_type_ops(v)->apply_extras(v, dst);
+               meta_type_ops(v)->apply_extras(v, dst);
  
         return 0;
  }
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
         r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
  
         switch (meta->lvalue.hdr.op) {
-               case TCF_EM_OPND_EQ:
-                       return !r;
-               case TCF_EM_OPND_LT:
-                       return r < 0;
-               case TCF_EM_OPND_GT:
-                       return r > 0;
+       case TCF_EM_OPND_EQ:
+               return !r;
+       case TCF_EM_OPND_LT:
+               return r < 0;
+       case TCF_EM_OPND_GT:
+               return r > 0;
         }
  
         return 0;
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
  
  static inline int meta_is_supported(struct meta_value *val)
  {
-       return (!meta_id(val) || meta_ops(val)->get);
+       return !meta_id(val) || meta_ops(val)->get;
  }
  
  static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c

index 1a4176a..a3bed07 100644 (file)
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
  #include <linux/tc_ematch/tc_em_nbyte.h>
  #include <net/pkt_cls.h>
  
-struct nbyte_data
-{
+struct nbyte_data {
         struct tcf_em_nbyte     hdr;
         char                    pattern[0];
  };
diff --git a/net/sched/em_text.c b/net/sched/em_text.c

index ea8f566..15d353d 100644 (file)
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
  #include <linux/tc_ematch/tc_em_text.h>
  #include <net/pkt_cls.h>
  
-struct text_match
-{
+struct text_match {
         u16                     from_offset;
         u16                     to_offset;
         u8                      from_layer;
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c

index 953f147..797bdb8 100644 (file)
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
         if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
                 return 0;
  
-       return !(((*(__be32*) ptr)  ^ key->val) & key->mask);
+       return !(((*(__be32 *) ptr)  ^ key->val) & key->mask);
  }
  
  static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c

index 5e37da9..88d93eb 100644 (file)
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
  static LIST_HEAD(ematch_ops);
  static DEFINE_RWLOCK(ematch_mod_lock);
  
-static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
  {
         struct tcf_ematch_ops *e = NULL;
  
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
  }
  EXPORT_SYMBOL(tcf_em_unregister);
  
-static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
-                                                  int index)
+static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
+                                                 int index)
  {
         return &tree->matches[index];
  }
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
  
         if (em_hdr->kind == TCF_EM_CONTAINER) {
                 /* Special ematch called "container", carries an index
-                * referencing an external ematch sequence. */
+                * referencing an external ematch sequence.
+                */
                 u32 ref;
  
                 if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                         goto errout;
  
                 /* We do not allow backward jumps to avoid loops and jumps
-                * to our own position are of course illegal. */
+                * to our own position are of course illegal.
+                */
                 if (ref <= idx)
                         goto errout;
  
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                  * which automatically releases the reference again, therefore
                  * the module MUST not be given back under any circumstances
                  * here. Be aware, the destroy function assumes that the
-                * module is held if the ops field is non zero. */
+                * module is held if the ops field is non zero.
+                */
                 em->ops = tcf_em_lookup(em_hdr->kind);
  
                 if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                         if (em->ops) {
                                 /* We dropped the RTNL mutex in order to
                                  * perform the module load. Tell the caller
-                                * to replay the request. */
+                                * to replay the request.
+                                */
                                 module_put(em->ops->owner);
                                 err = -EAGAIN;
                         }
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                 }
  
                 /* ematch module provides expected length of data, so we
-                * can do a basic sanity check. */
+                * can do a basic sanity check.
+                */
                 if (em->ops->datalen && data_len < em->ops->datalen)
                         goto errout;
  
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                          * TCF_EM_SIMPLE may be specified stating that the
                          * data only consists of a u32 integer and the module
                          * does not expected a memory reference but rather
-                        * the value carried. */
+                        * the value carried.
+                        */
                         if (em_hdr->flags & TCF_EM_SIMPLE) {
                                 if (data_len < sizeof(u32))
                                         goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
          * The array of rt attributes is parsed in the order as they are
          * provided, their type must be incremental from 1 to n. Even
          * if it does not serve any real purpose, a failure of sticking
-        * to this policy will result in parsing failure. */
+        * to this policy will result in parsing failure.
+        */
         for (idx = 0; nla_ok(rt_match, list_len); idx++) {
                 err = -EINVAL;
  
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
         /* Check if the number of matches provided by userspace actually
          * complies with the array of matches. The number was used for
          * the validation of references and a mismatch could lead to
-        * undefined references during the matching process. */
+        * undefined references during the matching process.
+        */
         if (idx != tree_hdr->nmatches) {
                 err = -EINVAL;
                 goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
                         .flags = em->flags
                 };
  
-               NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+               NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
  
                 if (em->ops && em->ops->dump) {
                         if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
                                struct tcf_pkt_info *info)
  {
         int r = em->ops->match(skb, em, info);
+
         return tcf_em_is_inverted(em) ? !r : r;
  }
  
@@ -527,8 +536,8 @@ pop_stack:
  
  stack_overflow:
         if (net_ratelimit())
-               printk(KERN_WARNING "tc ematch: local stack overflow,"
-                       " increase NET_EMATCH_STACK\n");
+               pr_warning("tc ematch: local stack overflow,"
+                          " increase NET_EMATCH_STACK\n");
         return -1;
  }
  EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c

index b22ca2d..1507415 100644 (file)
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
         int err = -ENOENT;
  
         write_lock(&qdisc_mod_lock);
-       for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+       for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
                 if (q == qops)
                         break;
         if (q) {
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
         if (!tab || --tab->refcnt)
                 return;
  
-       for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+       for (rtabp = &qdisc_rtab_list;
+            (rtab = *rtabp) != NULL;
+            rtabp = &rtab->next) {
                 if (rtab == tab) {
                         *rtabp = rtab->next;
                         kfree(rtab);
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
         return stab;
  }
  
+static void stab_kfree_rcu(struct rcu_head *head)
+{
+       kfree(container_of(head, struct qdisc_size_table, rcu));
+}
+
  void qdisc_put_stab(struct qdisc_size_table *tab)
  {
         if (!tab)
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
  
         if (--tab->refcnt == 0) {
                 list_del(&tab->list);
-               kfree(tab);
+               call_rcu_bh(&tab->rcu, stab_kfree_rcu);
         }
  
         spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@ nla_put_failure:
         return -1;
  }
  
-void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
  {
         int pkt_len, slot;
  
@@ -454,14 +461,13 @@ out:
                 pkt_len = 1;
         qdisc_skb_cb(skb)->pkt_len = pkt_len;
  }
-EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
  
  void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
  {
         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
-               printk(KERN_WARNING
-                      "%s: %s qdisc %X: is non-work-conserving?\n",
-                      txt, qdisc->ops->id, qdisc->handle >> 16);
+               pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+                       txt, qdisc->ops->id, qdisc->handle >> 16);
                 qdisc->flags |= TCQ_F_WARN_NONWC;
         }
  }
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
                                                  timer);
  
-       wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(wd->qdisc);
         __netif_schedule(qdisc_root(wd->qdisc));
  
         return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
                      &qdisc_root_sleeping(wd->qdisc)->state))
                 return;
  
-       wd->qdisc->flags |= TCQ_F_THROTTLED;
+       qdisc_throttled(wd->qdisc);
         time = ktime_set(0, 0);
         time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
         hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
  void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
  {
         hrtimer_cancel(&wd->timer);
-       wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(wd->qdisc);
  }
  EXPORT_SYMBOL(qdisc_watchdog_cancel);
  
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
                         autohandle = TC_H_MAKE(0x80000000U, 0);
         } while (qdisc_lookup(dev, autohandle) && --i > 0);
  
-       return i>0 ? autohandle : 0;
+       return i > 0 ? autohandle : 0;
  }
  
  void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                                 err = PTR_ERR(stab);
                                 goto err_out4;
                         }
-                       sch->stab = stab;
+                       rcu_assign_pointer(sch->stab, stab);
                 }
                 if (tca[TCA_RATE]) {
                         spinlock_t *root_lock;
@@ -874,7 +880,7 @@ err_out4:
          * Any broken qdiscs that would require a ops->reset() here?
          * The qdisc was never in action so it shouldn't be necessary.
          */
-       qdisc_put_stab(sch->stab);
+       qdisc_put_stab(rtnl_dereference(sch->stab));
         if (ops->destroy)
                 ops->destroy(sch);
         goto err_out3;
@@ -882,7 +888,7 @@ err_out4:
  
  static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
  {
-       struct qdisc_size_table *stab = NULL;
+       struct qdisc_size_table *ostab, *stab = NULL;
         int err = 0;
  
         if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
                         return PTR_ERR(stab);
         }
  
-       qdisc_put_stab(sch->stab);
-       sch->stab = stab;
+       ostab = rtnl_dereference(sch->stab);
+       rcu_assign_pointer(sch->stab, stab);
+       qdisc_put_stab(ostab);
  
         if (tca[TCA_RATE]) {
                 /* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@ out:
         return 0;
  }
  
-struct check_loop_arg
-{
-       struct qdisc_walker     w;
+struct check_loop_arg {
+       struct qdisc_walker     w;
         struct Qdisc            *p;
         int                     depth;
  };
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
         struct Qdisc *p = NULL;
         int err;
  
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                 return -ENODEV;
  
         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
         if (clid) {
                 if (clid != TC_H_ROOT) {
                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
-                               if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+                               p = qdisc_lookup(dev, TC_H_MAJ(clid));
+                               if (!p)
                                         return -ENOENT;
                                 q = qdisc_leaf(p, clid);
-                       } else { /* ingress */
-                               if (dev_ingress_queue(dev))
-                                       q = dev_ingress_queue(dev)->qdisc_sleeping;
+                       } else if (dev_ingress_queue(dev)) {
+                               q = dev_ingress_queue(dev)->qdisc_sleeping;
                         }
                 } else {
                         q = dev->qdisc;
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
                         return -EINVAL;
         } else {
-               if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+               q = qdisc_lookup(dev, tcm->tcm_handle);
+               if (!q)
                         return -ENOENT;
         }
  
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                         return -EINVAL;
                 if (q->handle == 0)
                         return -ENOENT;
-               if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+               err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+               if (err != 0)
                         return err;
         } else {
                 qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
  }
  
  /*
-   Create/change qdisc.
+ * Create/change qdisc.
   */
  
  static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@ replay:
         clid = tcm->tcm_parent;
         q = p = NULL;
  
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                 return -ENODEV;
  
         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@ replay:
         if (clid) {
                 if (clid != TC_H_ROOT) {
                         if (clid != TC_H_INGRESS) {
-                               if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+                               p = qdisc_lookup(dev, TC_H_MAJ(clid));
+                               if (!p)
                                         return -ENOENT;
                                 q = qdisc_leaf(p, clid);
-                       } else { /* ingress */
-                               if (dev_ingress_queue_create(dev))
-                                       q = dev_ingress_queue(dev)->qdisc_sleeping;
+                       } else if (dev_ingress_queue_create(dev)) {
+                               q = dev_ingress_queue(dev)->qdisc_sleeping;
                         }
                 } else {
                         q = dev->qdisc;
@@ -1063,13 +1073,14 @@ replay:
  
                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
                         if (tcm->tcm_handle) {
-                               if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+                               if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
                                         return -EEXIST;
                                 if (TC_H_MIN(tcm->tcm_handle))
                                         return -EINVAL;
-                               if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+                               q = qdisc_lookup(dev, tcm->tcm_handle);
+                               if (!q)
                                         goto create_n_graft;
-                               if (n->nlmsg_flags&NLM_F_EXCL)
+                               if (n->nlmsg_flags & NLM_F_EXCL)
                                         return -EEXIST;
                                 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
                                         return -EINVAL;
@@ -1079,7 +1090,7 @@ replay:
                                 atomic_inc(&q->refcnt);
                                 goto graft;
                         } else {
-                               if (q == NULL)
+                               if (!q)
                                         goto create_n_graft;
  
                                 /* This magic test requires explanation.
@@ -1101,9 +1112,9 @@ replay:
                                  *   For now we select create/graft, if
                                  *   user gave KIND, which does not match existing.
                                  */
-                               if ((n->nlmsg_flags&NLM_F_CREATE) &&
-                                   (n->nlmsg_flags&NLM_F_REPLACE) &&
-                                   ((n->nlmsg_flags&NLM_F_EXCL) ||
+                               if ((n->nlmsg_flags & NLM_F_CREATE) &&
+                                   (n->nlmsg_flags & NLM_F_REPLACE) &&
+                                   ((n->nlmsg_flags & NLM_F_EXCL) ||
                                      (tca[TCA_KIND] &&
                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
                                         goto create_n_graft;
@@ -1118,7 +1129,7 @@ replay:
         /* Change qdisc parameters */
         if (q == NULL)
                 return -ENOENT;
-       if (n->nlmsg_flags&NLM_F_EXCL)
+       if (n->nlmsg_flags & NLM_F_EXCL)
                 return -EEXIST;
         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
                 return -EINVAL;
@@ -1128,7 +1139,7 @@ replay:
         return err;
  
  create_n_graft:
-       if (!(n->nlmsg_flags&NLM_F_CREATE))
+       if (!(n->nlmsg_flags & NLM_F_CREATE))
                 return -ENOENT;
         if (clid == TC_H_INGRESS) {
                 if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
         struct nlmsghdr  *nlh;
         unsigned char *b = skb_tail_pointer(skb);
         struct gnet_dump d;
+       struct qdisc_size_table *stab;
  
         nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
         tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                 goto nla_put_failure;
         q->qstats.qlen = q->q.qlen;
  
-       if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+       stab = rtnl_dereference(q->stab);
+       if (stab && qdisc_dump_stab(skb, stab) < 0)
                 goto nla_put_failure;
  
         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
                 return -ENOBUFS;
  
         if (old && !tc_qdisc_dump_ignore(old)) {
-               if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+               if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+                                 0, RTM_DELQDISC) < 0)
                         goto err_out;
         }
         if (new && !tc_qdisc_dump_ignore(new)) {
-               if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+               if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+                                 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
                         goto err_out;
         }
  
         if (skb->len)
-               return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+               return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                                     n->nlmsg_flags & NLM_F_ECHO);
  
  err_out:
         kfree_skb(skb);
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                         q_idx++;
                         continue;
                 }
-               if (!tc_qdisc_dump_ignore(q) && 
+               if (!tc_qdisc_dump_ignore(q) &&
                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
                                   cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
                         goto done;
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
         u32 qid = TC_H_MAJ(clid);
         int err;
  
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                 return -ENODEV;
  
         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                         qid = dev->qdisc->handle;
  
                 /* Now qid is genuine qdisc handle consistent
-                  both with parent and child.
-
-                  TC_H_MAJ(pid) still may be unspecified, complete it now.
+                * both with parent and child.
+                *
+                * TC_H_MAJ(pid) still may be unspecified, complete it now.
                  */
                 if (pid)
                         pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
         }
  
         /* OK. Locate qdisc */
-       if ((q = qdisc_lookup(dev, qid)) == NULL)
+       q = qdisc_lookup(dev, qid);
+       if (!q)
                 return -ENOENT;
  
         /* An check that it supports classes */
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
  
         if (cl == 0) {
                 err = -ENOENT;
-               if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+               if (n->nlmsg_type != RTM_NEWTCLASS ||
+                   !(n->nlmsg_flags & NLM_F_CREATE))
                         goto out;
         } else {
                 switch (n->nlmsg_type) {
                 case RTM_NEWTCLASS:
                         err = -EEXIST;
-                       if (n->nlmsg_flags&NLM_F_EXCL)
+                       if (n->nlmsg_flags & NLM_F_EXCL)
                                 goto out;
                         break;
                 case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
                 return -EINVAL;
         }
  
-       return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                             n->nlmsg_flags & NLM_F_ECHO);
  }
  
-struct qdisc_dump_args
-{
-       struct qdisc_walker w;
-       struct sk_buff *skb;
-       struct netlink_callback *cb;
+struct qdisc_dump_args {
+       struct qdisc_walker     w;
+       struct sk_buff          *skb;
+       struct netlink_callback *cb;
  };
  
  static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
  
  static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
  {
-       struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+       struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
         struct net *net = sock_net(skb->sk);
         struct netdev_queue *dev_queue;
         struct net_device *dev;
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
  
         if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                 return 0;
-       if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                 return 0;
  
         s_t = cb->args[0];
@@ -1621,19 +1641,22 @@ done:
  }
  
  /* Main classifier routine: scans classifier chain attached
-   to this qdisc, (optionally) tests for protocol and asks
-   specific classifiers.
+ * to this qdisc, (optionally) tests for protocol and asks
+ * specific classifiers.
   */
  int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
                        struct tcf_result *res)
  {
         __be16 protocol = skb->protocol;
-       int err = 0;
+       int err;
  
         for (; tp; tp = tp->next) {
-               if ((tp->protocol == protocol ||
-                    tp->protocol == htons(ETH_P_ALL)) &&
-                   (err = tp->classify(skb, tp, res)) >= 0) {
+               if (tp->protocol != protocol &&
+                   tp->protocol != htons(ETH_P_ALL))
+                       continue;
+               err = tp->classify(skb, tp, res);
+
+               if (err >= 0) {
  #ifdef CONFIG_NET_CLS_ACT
                         if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
                                 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1664,11 +1687,11 @@ reclassify:
  
                 if (verd++ >= MAX_REC_LOOP) {
                         if (net_ratelimit())
-                               printk(KERN_NOTICE
-                                      "%s: packet reclassify loop"
+                               pr_notice("%s: packet reclassify loop"
                                           " rule prio %u protocol %02x\n",
-                                      tp->q->ops->id,
-                                      tp->prio & 0xffff, ntohs(tp->protocol));
+                                         tp->q->ops->id,
+                                         tp->prio & 0xffff,
+                                         ntohs(tp->protocol));
                         return TC_ACT_SHOT;
                 }
                 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1784,7 @@ static int __init pktsched_init(void)
  
         err = register_pernet_subsys(&psched_net_ops);
         if (err) {
-               printk(KERN_ERR "pktsched_init: "
+               pr_err("pktsched_init: "
                        "cannot initialize per netns operations\n");
                 return err;
         }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c

index 943d733..3f08158 100644 (file)
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
          * creation), and one for the reference held when calling delete.
          */
         if (flow->ref < 2) {
-               printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+               pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
                 return -EINVAL;
         }
         if (flow->ref > 2)
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                         }
                 }
                 flow = NULL;
-       done:
-               ;               
+done:
+               ;
         }
-       if (!flow)
+       if (!flow) {
                 flow = &p->link;
-       else {
+       } else {
                 if (flow->vcc)
                         ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
                 /*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
  
         list_for_each_entry_safe(flow, tmp, &p->flows, list) {
                 if (flow->ref > 1)
-                       printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
-                              flow->ref);
+                       pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
                 atm_tc_put(sch, (unsigned long)flow);
         }
         tasklet_kill(&p->task);
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
         }
         if (flow->excess)
                 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
-       else {
+       else
                 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
-       }
  
         nla_nest_end(skb, nest);
         return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c

index 5f63ec5..24d94c0 100644 (file)
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
  struct cbq_sched_data;
  
  
-struct cbq_class
-{
+struct cbq_class {
         struct Qdisc_class_common common;
         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  
@@ -139,19 +138,18 @@ struct cbq_class
         int                     refcnt;
         int                     filters;
  
-       struct cbq_class        *defaults[TC_PRIO_MAX+1];
+       struct cbq_class        *defaults[TC_PRIO_MAX + 1];
  };
  
-struct cbq_sched_data
-{
+struct cbq_sched_data {
         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
-       int                     nclasses[TC_CBQ_MAXPRIO+1];
-       unsigned                quanta[TC_CBQ_MAXPRIO+1];
+       int                     nclasses[TC_CBQ_MAXPRIO + 1];
+       unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
  
         struct cbq_class        link;
  
-       unsigned                activemask;
-       struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
+       unsigned int            activemask;
+       struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
                                                                    with backlog */
  
  #ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
         int                     tx_len;
         psched_time_t           now;            /* Cached timestamp */
         psched_time_t           now_rt;         /* Cached real time */
-       unsigned                pmask;
+       unsigned int            pmask;
  
         struct hrtimer          delay_timer;
         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
  };
  
  
-#define L2T(cl,len)    qdisc_l2t((cl)->R_tab,len)
+#define L2T(cl, len)   qdisc_l2t((cl)->R_tab, len)
  
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
  cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
  {
         struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
  static struct cbq_class *
  cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
  {
-       struct cbq_class *cl, *new;
+       struct cbq_class *cl;
  
-       for (cl = this->tparent; cl; cl = cl->tparent)
-               if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
-                       return new;
+       for (cl = this->tparent; cl; cl = cl->tparent) {
+               struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
  
+               if (new != NULL && new != this)
+                       return new;
+       }
         return NULL;
  }
  
  #endif
  
  /* Classify packet. The procedure is pretty complicated, but
-   it allows us to combine link sharing and priority scheduling
-   transparently.
-
-   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
-   so that it resolves to split nodes. Then packets are classified
-   by logical priority, or a more specific classifier may be attached
-   to the split node.
+ * it allows us to combine link sharing and priority scheduling
+ * transparently.
+ *
+ * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+ * so that it resolves to split nodes. Then packets are classified
+ * by logical priority, or a more specific classifier may be attached
+ * to the split node.
   */
  
  static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
         /*
          *  Step 1. If skb->priority points to one of our classes, use it.
          */
-       if (TC_H_MAJ(prio^sch->handle) == 0 &&
+       if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
             (cl = cbq_class_lookup(q, prio)) != NULL)
                 return cl;
  
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
                         goto fallback;
  
-               if ((cl = (void*)res.class) == NULL) {
+               cl = (void *)res.class;
+               if (!cl) {
                         if (TC_H_MAJ(res.classid))
                                 cl = cbq_class_lookup(q, res.classid);
-                       else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+                       else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
                                 cl = defmap[TC_PRIO_BESTEFFORT];
  
                         if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
          * Step 4. No success...
          */
         if (TC_H_MAJ(prio) == 0 &&
-           !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+           !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
                 return head;
  
@@ -290,12 +291,12 @@ fallback:
  }
  
  /*
-   A packet has just been enqueued on the empty class.
-   cbq_activate_class adds it to the tail of active class list
-   of its priority band.
+ * A packet has just been enqueued on the empty class.
+ * cbq_activate_class adds it to the tail of active class list
+ * of its priority band.
   */
  
-static __inline__ void cbq_activate_class(struct cbq_class *cl)
+static inline void cbq_activate_class(struct cbq_class *cl)
  {
         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
         int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
  }
  
  /*
-   Unlink class from active chain.
-   Note that this same procedure is done directly in cbq_dequeue*
-   during round-robin procedure.
+ * Unlink class from active chain.
+ * Note that this same procedure is done directly in cbq_dequeue*
+ * during round-robin procedure.
   */
  
  static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
  {
         int toplevel = q->toplevel;
  
-       if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+       if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
                 psched_time_t now;
                 psched_tdiff_t incr;
  
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
                                 q->toplevel = cl->level;
                                 return;
                         }
-               } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+               } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
         }
  }
  
@@ -417,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
                 delay += cl->offtime;
  
                 /*
-                  Class goes to sleep, so that it will have no
-                  chance to work avgidle. Let's forgive it 8)
-
-                  BTW cbq-2.0 has a crap in this
-                  place, apparently they forgot to shift it by cl->ewma_log.
+                * Class goes to sleep, so that it will have no
+                * chance to work avgidle. Let's forgive it 8)
+                *
+                * BTW cbq-2.0 has a crap in this
+                * place, apparently they forgot to shift it by cl->ewma_log.
                  */
                 if (cl->avgidle < 0)
                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -438,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
                 q->wd_expires = delay;
  
         /* Dirty work! We must schedule wakeups based on
-          real available rate, rather than leaf rate,
-          which may be tiny (even zero).
+        * real available rate, rather than leaf rate,
+        * which may be tiny (even zero).
          */
         if (q->toplevel == TC_CBQ_MAXLEVEL) {
                 struct cbq_class *b;
@@ -459,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
  }
  
  /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
-   they go overlimit
+ * they go overlimit
   */
  
  static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -594,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
         struct Qdisc *sch = q->watchdog.qdisc;
         psched_time_t now;
         psched_tdiff_t delay = 0;
-       unsigned pmask;
+       unsigned int pmask;
  
         now = psched_get_time();
  
@@ -623,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
         }
  
-       sch->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(sch);
         __netif_schedule(qdisc_root(sch));
         return HRTIMER_NORESTART;
  }
@@ -663,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
  #endif
  
  /*
-   It is mission critical procedure.
-
-   We "regenerate" toplevel cutoff, if transmitting class
-   has backlog and it is not regulated. It is not part of
-   original CBQ description, but looks more reasonable.
-   Probably, it is wrong. This question needs further investigation.
-*/
+ * It is mission critical procedure.
+ *
+ * We "regenerate" toplevel cutoff, if transmitting class
+ * has backlog and it is not regulated. It is not part of
+ * original CBQ description, but looks more reasonable.
+ * Probably, it is wrong. This question needs further investigation.
+ */
  
-static __inline__ void
+static inline void
  cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
                     struct cbq_class *borrowed)
  {
@@ -682,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
                                         q->toplevel = borrowed->level;
                                         return;
                                 }
-                       } while ((borrowed=borrowed->borrow) != NULL);
+                       } while ((borrowed = borrowed->borrow) != NULL);
                 }
  #if 0
         /* It is not necessary now. Uncommenting it
@@ -710,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
                 cl->bstats.bytes += len;
  
                 /*
-                  (now - last) is total time between packet right edges.
-                  (last_pktlen/rate) is "virtual" busy time, so that
-
-                        idle = (now - last) - last_pktlen/rate
+                * (now - last) is total time between packet right edges.
+                * (last_pktlen/rate) is "virtual" busy time, so that
+                *
+                *      idle = (now - last) - last_pktlen/rate
                  */
  
                 idle = q->now - cl->last;
@@ -723,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
                         idle -= L2T(cl, len);
  
                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
-                  where W=2^{-ewma_log}. But cl->avgidle is scaled:
-                  cl->avgidle == true_avgidle/W,
-                  hence:
+                * where W=2^{-ewma_log}. But cl->avgidle is scaled:
+                * cl->avgidle == true_avgidle/W,
+                * hence:
                  */
                         avgidle += idle - (avgidle>>cl->ewma_log);
                 }
@@ -739,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
                         cl->avgidle = avgidle;
  
                         /* Calculate expected time, when this class
-                          will be allowed to send.
-                          It will occur, when:
-                          (1-W)*true_avgidle + W*delay = 0, i.e.
-                          idle = (1/W - 1)*(-true_avgidle)
-                          or
-                          idle = (1 - W)*(-cl->avgidle);
+                        * will be allowed to send.
+                        * It will occur, when:
+                        * (1-W)*true_avgidle + W*delay = 0, i.e.
+                        * idle = (1/W - 1)*(-true_avgidle)
+                        * or
+                        * idle = (1 - W)*(-cl->avgidle);
                          */
                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
  
                         /*
-                          That is not all.
-                          To maintain the rate allocated to the class,
-                          we add to undertime virtual clock,
-                          necessary to complete transmitted packet.
-                          (len/phys_bandwidth has been already passed
-                          to the moment of cbq_update)
+                        * That is not all.
+                        * To maintain the rate allocated to the class,
+                        * we add to undertime virtual clock,
+                        * necessary to complete transmitted packet.
+                        * (len/phys_bandwidth has been already passed
+                        * to the moment of cbq_update)
                          */
  
                         idle -= L2T(&q->link, len);
@@ -776,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
         cbq_update_toplevel(q, this, q->tx_borrowed);
  }
  
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
  cbq_under_limit(struct cbq_class *cl)
  {
         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -792,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
  
         do {
                 /* It is very suspicious place. Now overlimit
-                  action is generated for not bounded classes
-                  only if link is completely congested.
-                  Though it is in agree with ancestor-only paradigm,
-                  it looks very stupid. Particularly,
-                  it means that this chunk of code will either
-                  never be called or result in strong amplification
-                  of burstiness. Dangerous, silly, and, however,
-                  no another solution exists.
+                * action is generated for not bounded classes
+                * only if link is completely congested.
+                * Though it is in agree with ancestor-only paradigm,
+                * it looks very stupid. Particularly,
+                * it means that this chunk of code will either
+                * never be called or result in strong amplification
+                * of burstiness. Dangerous, silly, and, however,
+                * no another solution exists.
                  */
-               if ((cl = cl->borrow) == NULL) {
+               cl = cl->borrow;
+               if (!cl) {
                         this_cl->qstats.overlimits++;
                         this_cl->overlimit(this_cl);
                         return NULL;
@@ -814,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
         return cl;
  }
  
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
  cbq_dequeue_prio(struct Qdisc *sch, int prio)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
@@ -838,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
  
                         if (cl->deficit <= 0) {
                                 /* Class exhausted its allotment per
-                                  this round. Switch to the next one.
+                                * this round. Switch to the next one.
                                  */
                                 deficit = 1;
                                 cl->deficit += cl->quantum;
@@ -848,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
                         skb = cl->q->dequeue(cl->q);
  
                         /* Class did not give us any skb :-(
-                          It could occur even if cl->q->q.qlen != 0
-                          f.e. if cl->q == "tbf"
+                        * It could occur even if cl->q->q.qlen != 0
+                        * f.e. if cl->q == "tbf"
                          */
                         if (skb == NULL)
                                 goto skip_class;
@@ -878,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
  skip_class:
                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
                                 /* Class is empty or penalized.
-                                  Unlink it from active chain.
+                                * Unlink it from active chain.
                                  */
                                 cl_prev->next_alive = cl->next_alive;
                                 cl->next_alive = NULL;
@@ -917,14 +919,14 @@ next_class:
         return NULL;
  }
  
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
  cbq_dequeue_1(struct Qdisc *sch)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
         struct sk_buff *skb;
-       unsigned activemask;
+       unsigned int activemask;
  
-       activemask = q->activemask&0xFF;
+       activemask = q->activemask & 0xFF;
         while (activemask) {
                 int prio = ffz(~activemask);
                 activemask &= ~(1<<prio);
@@ -949,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
         if (q->tx_class) {
                 psched_tdiff_t incr2;
                 /* Time integrator. We calculate EOS time
-                  by adding expected packet transmission time.
-                  If real time is greater, we warp artificial clock,
-                  so that:
-
-                  cbq_time = max(real_time, work);
+                * by adding expected packet transmission time.
+                * If real time is greater, we warp artificial clock,
+                * so that:
+                *
+                * cbq_time = max(real_time, work);
                  */
                 incr2 = L2T(&q->link, q->tx_len);
                 q->now += incr2;
@@ -971,27 +973,27 @@ cbq_dequeue(struct Qdisc *sch)
                 if (skb) {
                         qdisc_bstats_update(sch, skb);
                         sch->q.qlen--;
-                       sch->flags &= ~TCQ_F_THROTTLED;
+                       qdisc_unthrottled(sch);
                         return skb;
                 }
  
                 /* All the classes are overlimit.
-
-                  It is possible, if:
-
-                  1. Scheduler is empty.
-                  2. Toplevel cutoff inhibited borrowing.
-                  3. Root class is overlimit.
-
-                  Reset 2d and 3d conditions and retry.
-
-                  Note, that NS and cbq-2.0 are buggy, peeking
-                  an arbitrary class is appropriate for ancestor-only
-                  sharing, but not for toplevel algorithm.
-
-                  Our version is better, but slower, because it requires
-                  two passes, but it is unavoidable with top-level sharing.
-               */
+                *
+                * It is possible, if:
+                *
+                * 1. Scheduler is empty.
+                * 2. Toplevel cutoff inhibited borrowing.
+                * 3. Root class is overlimit.
+                *
+                * Reset 2d and 3d conditions and retry.
+                *
+                * Note, that NS and cbq-2.0 are buggy, peeking
+                * an arbitrary class is appropriate for ancestor-only
+                * sharing, but not for toplevel algorithm.
+                *
+                * Our version is better, but slower, because it requires
+                * two passes, but it is unavoidable with top-level sharing.
+                */
  
                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
                     q->link.undertime == PSCHED_PASTPERFECT)
@@ -1002,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
         }
  
         /* No packets in scheduler or nobody wants to give them to us :-(
-          Sigh... start watchdog timer in the last case. */
+        * Sigh... start watchdog timer in the last case.
+        */
  
         if (sch->q.qlen) {
                 sch->qstats.overlimits++;
@@ -1024,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
                 int level = 0;
                 struct cbq_class *cl;
  
-               if ((cl = this->children) != NULL) {
+               cl = this->children;
+               if (cl) {
                         do {
                                 if (cl->level > level)
                                         level = cl->level;
                         } while ((cl = cl->sibling) != this->children);
                 }
-               this->level = level+1;
+               this->level = level + 1;
         } while ((this = this->tparent) != NULL);
  }
  
@@ -1046,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
         for (h = 0; h < q->clhash.hashsize; h++) {
                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
                         /* BUGGGG... Beware! This expression suffer of
-                          arithmetic overflows!
+                        * arithmetic overflows!
                          */
                         if (cl->priority == prio) {
                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
                                         q->quanta[prio];
                         }
                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
-                               printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+                               pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+                                          cl->common.classid, cl->quantum);
                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
                         }
                 }
@@ -1064,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
  {
         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
         struct cbq_class *split = cl->split;
-       unsigned h;
+       unsigned int h;
         int i;
  
         if (split == NULL)
                 return;
  
-       for (i=0; i<=TC_PRIO_MAX; i++) {
-               if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+       for (i = 0; i <= TC_PRIO_MAX; i++) {
+               if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
                         split->defaults[i] = NULL;
         }
  
-       for (i=0; i<=TC_PRIO_MAX; i++) {
+       for (i = 0; i <= TC_PRIO_MAX; i++) {
                 int level = split->level;
  
                 if (split->defaults[i])
@@ -1088,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
                                              common.hnode) {
                                 if (c->split == split && c->level < level &&
-                                   c->defmap&(1<<i)) {
+                                   c->defmap & (1<<i)) {
                                         split->defaults[i] = c;
                                         level = c->level;
                                 }
@@ -1102,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
         struct cbq_class *split = NULL;
  
         if (splitid == 0) {
-               if ((split = cl->split) == NULL)
+               split = cl->split;
+               if (!split)
                         return;
                 splitid = split->common.classid;
         }
@@ -1120,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
                 cl->defmap = 0;
                 cbq_sync_defmap(cl);
                 cl->split = split;
-               cl->defmap = def&mask;
+               cl->defmap = def & mask;
         } else
-               cl->defmap = (cl->defmap&~mask)|(def&mask);
+               cl->defmap = (cl->defmap & ~mask) | (def & mask);
  
         cbq_sync_defmap(cl);
  }
@@ -1135,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
         qdisc_class_hash_remove(&q->clhash, &this->common);
  
         if (this->tparent) {
-               clp=&this->sibling;
+               clp = &this->sibling;
                 cl = *clp;
                 do {
                         if (cl == this) {
@@ -1174,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
         }
  }
  
-static unsigned int cbq_drop(struct Qdisc* sch)
+static unsigned int cbq_drop(struct Qdisc *sch)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
         struct cbq_class *cl, *cl_head;
@@ -1182,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
         unsigned int len;
  
         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
-               if ((cl_head = q->active[prio]) == NULL)
+               cl_head = q->active[prio];
+               if (!cl_head)
                         continue;
  
                 cl = cl_head;
@@ -1199,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
  }
  
  static void
-cbq_reset(struct Qdisc* sch)
+cbq_reset(struct Qdisc *sch)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
         struct cbq_class *cl;
         struct hlist_node *n;
         int prio;
-       unsigned h;
+       unsigned int h;
  
         q->activemask = 0;
         q->pmask = 0;
@@ -1237,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
  
  static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
  {
-       if (lss->change&TCF_CBQ_LSS_FLAGS) {
-               cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
-               cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+       if (lss->change & TCF_CBQ_LSS_FLAGS) {
+               cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+               cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
         }
-       if (lss->change&TCF_CBQ_LSS_EWMA)
+       if (lss->change & TCF_CBQ_LSS_EWMA)
                 cl->ewma_log = lss->ewma_log;
-       if (lss->change&TCF_CBQ_LSS_AVPKT)
+       if (lss->change & TCF_CBQ_LSS_AVPKT)
                 cl->avpkt = lss->avpkt;
-       if (lss->change&TCF_CBQ_LSS_MINIDLE)
+       if (lss->change & TCF_CBQ_LSS_MINIDLE)
                 cl->minidle = -(long)lss->minidle;
-       if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+       if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
                 cl->maxidle = lss->maxidle;
                 cl->avgidle = lss->maxidle;
         }
-       if (lss->change&TCF_CBQ_LSS_OFFTIME)
+       if (lss->change & TCF_CBQ_LSS_OFFTIME)
                 cl->offtime = lss->offtime;
         return 0;
  }
@@ -1279,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
         if (wrr->weight)
                 cl->weight = wrr->weight;
         if (wrr->priority) {
-               cl->priority = wrr->priority-1;
+               cl->priority = wrr->priority - 1;
                 cl->cpriority = cl->priority;
                 if (cl->priority >= cl->priority2)
-                       cl->priority2 = TC_CBQ_MAXPRIO-1;
+                       cl->priority2 = TC_CBQ_MAXPRIO - 1;
         }
  
         cbq_addprio(q, cl);
@@ -1299,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
                 cl->overlimit = cbq_ovl_delay;
                 break;
         case TC_CBQ_OVL_LOWPRIO:
-               if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
-                   ovl->priority2-1 <= cl->priority)
+               if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
+                   ovl->priority2 - 1 <= cl->priority)
                         return -EINVAL;
-               cl->priority2 = ovl->priority2-1;
+               cl->priority2 = ovl->priority2 - 1;
                 cl->overlimit = cbq_ovl_lowprio;
                 break;
         case TC_CBQ_OVL_DROP:
@@ -1381,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
         if (!q->link.q)
                 q->link.q = &noop_qdisc;
  
-       q->link.priority = TC_CBQ_MAXPRIO-1;
-       q->link.priority2 = TC_CBQ_MAXPRIO-1;
-       q->link.cpriority = TC_CBQ_MAXPRIO-1;
+       q->link.priority = TC_CBQ_MAXPRIO - 1;
+       q->link.priority2 = TC_CBQ_MAXPRIO - 1;
+       q->link.cpriority = TC_CBQ_MAXPRIO - 1;
         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
         q->link.overlimit = cbq_ovl_classic;
         q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1414,7 +1421,7 @@ put_rtab:
         return err;
  }
  
-static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
  {
         unsigned char *b = skb_tail_pointer(skb);
  
@@ -1426,7 +1433,7 @@ nla_put_failure:
         return -1;
  }
  
-static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tc_cbq_lssopt opt;
@@ -1451,15 +1458,15 @@ nla_put_failure:
         return -1;
  }
  
-static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tc_cbq_wrropt opt;
  
         opt.flags = 0;
         opt.allot = cl->allot;
-       opt.priority = cl->priority+1;
-       opt.cpriority = cl->cpriority+1;
+       opt.priority = cl->priority + 1;
+       opt.cpriority = cl->cpriority + 1;
         opt.weight = cl->weight;
         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
         return skb->len;
@@ -1469,13 +1476,13 @@ nla_put_failure:
         return -1;
  }
  
-static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tc_cbq_ovl opt;
  
         opt.strategy = cl->ovl_strategy;
-       opt.priority2 = cl->priority2+1;
+       opt.priority2 = cl->priority2 + 1;
         opt.pad = 0;
         opt.penalty = cl->penalty;
         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1486,7 +1493,7 @@ nla_put_failure:
         return -1;
  }
  
-static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tc_cbq_fopt opt;
@@ -1505,7 +1512,7 @@ nla_put_failure:
  }
  
  #ifdef CONFIG_NET_CLS_ACT
-static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
  {
         unsigned char *b = skb_tail_pointer(skb);
         struct tc_cbq_police opt;
@@ -1569,7 +1576,7 @@ static int
  cbq_dump_class(struct Qdisc *sch, unsigned long arg,
                struct sk_buff *skb, struct tcmsg *tcm)
  {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
         struct nlattr *nest;
  
         if (cl->tparent)
@@ -1597,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
         struct gnet_dump *d)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
  
         cl->qstats.qlen = cl->q->q.qlen;
         cl->xstats.avgidle = cl->avgidle;
@@ -1617,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
  static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                      struct Qdisc **old)
  {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
  
         if (new == NULL) {
                 new = qdisc_create_dflt(sch->dev_queue,
@@ -1640,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
         return 0;
  }
  
-static struct Qdisc *
-cbq_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
  {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
  
         return cl->q;
  }
@@ -1682,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
                 kfree(cl);
  }
  
-static void
-cbq_destroy(struct Qdisc* sch)
+static void cbq_destroy(struct Qdisc *sch)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
         struct hlist_node *n, *next;
         struct cbq_class *cl;
-       unsigned h;
+       unsigned int h;
  
  #ifdef CONFIG_NET_CLS_ACT
         q->rx_class = NULL;
@@ -1712,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
  
  static void cbq_put(struct Qdisc *sch, unsigned long arg)
  {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
  
         if (--cl->refcnt == 0) {
  #ifdef CONFIG_NET_CLS_ACT
@@ -1735,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
  {
         int err;
         struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *cl = (struct cbq_class*)*arg;
+       struct cbq_class *cl = (struct cbq_class *)*arg;
         struct nlattr *opt = tca[TCA_OPTIONS];
         struct nlattr *tb[TCA_CBQ_MAX + 1];
         struct cbq_class *parent;
@@ -1827,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
  
         if (classid) {
                 err = -EINVAL;
-               if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+               if (TC_H_MAJ(classid ^ sch->handle) ||
+                   cbq_class_lookup(q, classid))
                         goto failure;
         } else {
                 int i;
-               classid = TC_H_MAKE(sch->handle,0x8000);
+               classid = TC_H_MAKE(sch->handle, 0x8000);
  
-               for (i=0; i<0x8000; i++) {
+               for (i = 0; i < 0x8000; i++) {
                         if (++q->hgenerator >= 0x8000)
                                 q->hgenerator = 1;
                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1890,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
         cl->minidle = -0x7FFFFFFF;
         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-       if (cl->ewma_log==0)
+       if (cl->ewma_log == 0)
                 cl->ewma_log = q->link.ewma_log;
-       if (cl->maxidle==0)
+       if (cl->maxidle == 0)
                 cl->maxidle = q->link.maxidle;
-       if (cl->avpkt==0)
+       if (cl->avpkt == 0)
                 cl->avpkt = q->link.avpkt;
         cl->overlimit = cbq_ovl_classic;
         if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1920,7 +1926,7 @@ failure:
  static int cbq_delete(struct Qdisc *sch, unsigned long arg)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
         unsigned int qlen;
  
         if (cl->filters || cl->children || cl == &q->link)
@@ -1978,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
                                      u32 classid)
  {
         struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *p = (struct cbq_class*)parent;
+       struct cbq_class *p = (struct cbq_class *)parent;
         struct cbq_class *cl = cbq_class_lookup(q, classid);
  
         if (cl) {
@@ -1992,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
  
  static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
  {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
  
         cl->filters--;
  }
@@ -2002,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
         struct cbq_sched_data *q = qdisc_priv(sch);
         struct cbq_class *cl;
         struct hlist_node *n;
-       unsigned h;
+       unsigned int h;
  
         if (arg->stop)
                 return;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c

index 0f7bf3f..2c79020 100644 (file)
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
                 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
  
         if (tb[TCA_DSMARK_VALUE])
-               p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+               p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
  
         if (tb[TCA_DSMARK_MASK])
-               p->mask[*arg-1] = mask;
+               p->mask[*arg - 1] = mask;
  
         err = 0;
  
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
         if (!dsmark_valid_index(p, arg))
                 return -EINVAL;
  
-       p->mask[arg-1] = 0xff;
-       p->value[arg-1] = 0;
+       p->mask[arg - 1] = 0xff;
+       p->value[arg - 1] = 0;
  
         return 0;
  }
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
                 if (p->mask[i] == 0xff && !p->value[i])
                         goto ignore;
                 if (walker->count >= walker->skip) {
-                       if (walker->fn(sch, i+1, walker) < 0) {
+                       if (walker->fn(sch, i + 1, walker) < 0) {
                                 walker->stop = 1;
                                 break;
                         }
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
                  * and don't need yet another qdisc as a bypass.
                  */
                 if (p->mask[index] != 0xff || p->value[index])
-                       printk(KERN_WARNING
-                              "dsmark_dequeue: unsupported protocol %d\n",
-                              ntohs(skb->protocol));
+                       pr_warning("dsmark_dequeue: unsupported protocol %d\n",
+                                  ntohs(skb->protocol));
                 break;
         }
  
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
         if (!dsmark_valid_index(p, cl))
                 return -EINVAL;
  
-       tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+       tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
         tcm->tcm_info = p->q->handle;
  
         opts = nla_nest_start(skb, TCA_OPTIONS);
         if (opts == NULL)
                 goto nla_put_failure;
-       NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
-       NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+       NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
+       NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
  
         return nla_nest_end(skb, opts);
  
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c

index d468b47..be33f9d 100644 (file)
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,12 +19,11 @@
  
  /* 1 band FIFO pseudo-"scheduler" */
  
-struct fifo_sched_data
-{
+struct fifo_sched_data {
         u32 limit;
  };
  
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
         struct fifo_sched_data *q = qdisc_priv(sch);
  
@@ -34,7 +33,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
         return qdisc_reshape_fail(skb, sch);
  }
  
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
         struct fifo_sched_data *q = qdisc_priv(sch);
  
@@ -44,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
         return qdisc_reshape_fail(skb, sch);
  }
  
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
         struct fifo_sched_data *q = qdisc_priv(sch);
  
@@ -62,11 +61,13 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
  {
         struct fifo_sched_data *q = qdisc_priv(sch);
+       bool bypass;
+       bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
  
         if (opt == NULL) {
                 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
  
-               if (sch->ops == &bfifo_qdisc_ops)
+               if (is_bfifo)
                         limit *= psched_mtu(qdisc_dev(sch));
  
                 q->limit = limit;
@@ -79,6 +80,15 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
                 q->limit = ctl->limit;
         }
  
+       if (is_bfifo)
+               bypass = q->limit >= psched_mtu(qdisc_dev(sch));
+       else
+               bypass = q->limit >= 1;
+
+       if (bypass)
+               sch->flags |= TCQ_F_CAN_BYPASS;
+       else
+               sch->flags &= ~TCQ_F_CAN_BYPASS;
         return 0;
  }
  
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c

index 34dc598..0da09d5 100644 (file)
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
                  */
                 kfree_skb(skb);
                 if (net_ratelimit())
-                       printk(KERN_WARNING "Dead loop on netdevice %s, "
-                              "fix it urgently!\n", dev_queue->dev->name);
+                       pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
+                                  dev_queue->dev->name);
                 ret = qdisc_qlen(q);
         } else {
                 /*
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
         } else {
                 /* Driver returned NETDEV_TX_BUSY - requeue skb */
                 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-                       printk(KERN_WARNING "BUG %s code %d qlen %d\n",
-                              dev->name, ret, q->q.qlen);
+                       pr_warning("BUG %s code %d qlen %d\n",
+                                  dev->name, ret, q->q.qlen);
  
                 ret = dev_requeue_skb(skb, q);
         }
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = {
  };
  
  
-static const u8 prio2band[TC_PRIO_MAX+1] =
-       { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+       1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
  
  /* 3-band FIFO queue: old style, but should be a bit faster than
     generic prio+fifo combination.
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
         return priv->q + band;
  }
  
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
  {
         if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
                 int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
         return qdisc_drop(skb, qdisc);
  }
  
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
  {
         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
         int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
         return NULL;
  }
  
-static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
  {
         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
         int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
         return NULL;
  }
  
-static void pfifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc *qdisc)
  {
         int prio;
         struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
  {
         struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
  
-       memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+       memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
         NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
         return skb->len;
  
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
         for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
                 skb_queue_head_init(band2list(priv, prio));
  
+       /* Can by-pass the queue discipline */
+       qdisc->flags |= TCQ_F_CAN_BYPASS;
         return 0;
  }
  
@@ -540,6 +543,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
         .dump           =       pfifo_fast_dump,
         .owner          =       THIS_MODULE,
  };
+EXPORT_SYMBOL(pfifo_fast_ops);
  
  struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                           struct Qdisc_ops *ops)
@@ -630,7 +634,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
  #ifdef CONFIG_NET_SCHED
         qdisc_list_del(qdisc);
  
-       qdisc_put_stab(qdisc->stab);
+       qdisc_put_stab(rtnl_dereference(qdisc->stab));
  #endif
         gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
         if (ops->reset)
@@ -674,25 +678,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
  
         return oqdisc;
  }
+EXPORT_SYMBOL(dev_graft_qdisc);
  
  static void attach_one_default_qdisc(struct net_device *dev,
                                      struct netdev_queue *dev_queue,
                                      void *_unused)
  {
-       struct Qdisc *qdisc;
+       struct Qdisc *qdisc = &noqueue_qdisc;
  
         if (dev->tx_queue_len) {
                 qdisc = qdisc_create_dflt(dev_queue,
                                           &pfifo_fast_ops, TC_H_ROOT);
                 if (!qdisc) {
-                       printk(KERN_INFO "%s: activation failed\n", dev->name);
+                       netdev_info(dev, "activation failed\n");
                         return;
                 }
-
-               /* Can by-pass the queue discipline for default qdisc */
-               qdisc->flags |= TCQ_F_CAN_BYPASS;
-       } else {
-               qdisc =  &noqueue_qdisc;
         }
         dev_queue->qdisc_sleeping = qdisc;
  }
@@ -761,6 +761,7 @@ void dev_activate(struct net_device *dev)
                 dev_watchdog_up(dev);
         }
  }
+EXPORT_SYMBOL(dev_activate);
  
  static void dev_deactivate_queue(struct net_device *dev,
                                  struct netdev_queue *dev_queue,
@@ -840,6 +841,7 @@ void dev_deactivate(struct net_device *dev)
         list_add(&dev->unreg_list, &single);
         dev_deactivate_many(&single);
  }
+EXPORT_SYMBOL(dev_deactivate);
  
  static void dev_init_scheduler_queue(struct net_device *dev,
                                      struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c

index 51dcc2a..b9493a0 100644 (file)
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
  struct gred_sched_data;
  struct gred_sched;
  
-struct gred_sched_data
-{
+struct gred_sched_data {
         u32             limit;          /* HARD maximal queue length    */
         u32             DP;             /* the drop pramaters */
         u32             bytesin;        /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
         GRED_RIO_MODE,
  };
  
-struct gred_sched
-{
+struct gred_sched {
         struct gred_sched_data *tab[MAX_DPs];
         unsigned long   flags;
         u32             red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
         return t->red_flags & TC_RED_HARDDROP;
  }
  
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
-       struct gred_sched_data *q=NULL;
-       struct gred_sched *t= qdisc_priv(sch);
+       struct gred_sched_data *q = NULL;
+       struct gred_sched *t = qdisc_priv(sch);
         unsigned long qavg = 0;
         u16 dp = tc_index_to_dp(skb);
  
-       if (dp >= t->DPs  || (q = t->tab[dp]) == NULL) {
+       if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                 dp = t->def;
  
-               if ((q = t->tab[dp]) == NULL) {
+               q = t->tab[dp];
+               if (!q) {
                         /* Pass through packets not assigned to a DP
                          * if no default DP has been configured. This
                          * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                 for (i = 0; i < t->DPs; i++) {
                         if (t->tab[i] && t->tab[i]->prio < q->prio &&
                             !red_is_idling(&t->tab[i]->parms))
-                               qavg +=t->tab[i]->parms.qavg;
+                               qavg += t->tab[i]->parms.qavg;
                 }
  
         }
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                 gred_store_wred_set(t, q);
  
         switch (red_action(&q->parms, q->parms.qavg + qavg)) {
-               case RED_DONT_MARK:
-                       break;
-
-               case RED_PROB_MARK:
-                       sch->qstats.overlimits++;
-                       if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
-                               q->stats.prob_drop++;
-                               goto congestion_drop;
-                       }
-
-                       q->stats.prob_mark++;
-                       break;
-
-               case RED_HARD_MARK:
-                       sch->qstats.overlimits++;
-                       if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
-                           !INET_ECN_set_ce(skb)) {
-                               q->stats.forced_drop++;
-                               goto congestion_drop;
-                       }
-                       q->stats.forced_mark++;
-                       break;
+       case RED_DONT_MARK:
+               break;
+
+       case RED_PROB_MARK:
+               sch->qstats.overlimits++;
+               if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+                       q->stats.prob_drop++;
+                       goto congestion_drop;
+               }
+
+               q->stats.prob_mark++;
+               break;
+
+       case RED_HARD_MARK:
+               sch->qstats.overlimits++;
+               if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+                   !INET_ECN_set_ce(skb)) {
+                       q->stats.forced_drop++;
+                       goto congestion_drop;
+               }
+               q->stats.forced_mark++;
+               break;
         }
  
         if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
         return NET_XMIT_CN;
  }
  
-static struct sk_buff *gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc *sch)
  {
         struct sk_buff *skb;
         struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
  
                 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                         if (net_ratelimit())
-                               printk(KERN_WARNING "GRED: Unable to relocate "
-                                      "VQ 0x%x after dequeue, screwing up "
-                                      "backlog.\n", tc_index_to_dp(skb));
+                               pr_warning("GRED: Unable to relocate VQ 0x%x "
+                                          "after dequeue, screwing up "
+                                          "backlog.\n", tc_index_to_dp(skb));
                 } else {
                         q->backlog -= qdisc_pkt_len(skb);
  
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
         return NULL;
  }
  
-static unsigned int gred_drop(struct Qdisc* sch)
+static unsigned int gred_drop(struct Qdisc *sch)
  {
         struct sk_buff *skb;
         struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
  
                 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                         if (net_ratelimit())
-                               printk(KERN_WARNING "GRED: Unable to relocate "
-                                      "VQ 0x%x while dropping, screwing up "
-                                      "backlog.\n", tc_index_to_dp(skb));
+                               pr_warning("GRED: Unable to relocate VQ 0x%x "
+                                          "while dropping, screwing up "
+                                          "backlog.\n", tc_index_to_dp(skb));
                 } else {
                         q->backlog -= len;
                         q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
  
  }
  
-static void gred_reset(struct Qdisc* sch)
+static void gred_reset(struct Qdisc *sch)
  {
         int i;
         struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
  
         for (i = table->DPs; i < MAX_DPs; i++) {
                 if (table->tab[i]) {
-                       printk(KERN_WARNING "GRED: Warning: Destroying "
-                              "shadowed VQ 0x%x\n", i);
+                       pr_warning("GRED: Warning: Destroying "
+                                  "shadowed VQ 0x%x\n", i);
                         gred_destroy_vq(table->tab[i]);
                         table->tab[i] = NULL;
                 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c

index 14a799d..6488e64 100644 (file)
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
   *   that are expensive on 32-bit architectures.
   */
  
-struct internal_sc
-{
+struct internal_sc {
         u64     sm1;    /* scaled slope of the 1st segment */
         u64     ism1;   /* scaled inverse-slope of the 1st segment */
         u64     dx;     /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
  };
  
  /* runtime service curve */
-struct runtime_sc
-{
+struct runtime_sc {
         u64     x;      /* current starting position on x-axis */
         u64     y;      /* current starting position on y-axis */
         u64     sm1;    /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
         u64     ism2;   /* scaled inverse-slope of the 2nd segment */
  };
  
-enum hfsc_class_flags
-{
+enum hfsc_class_flags {
         HFSC_RSC = 0x1,
         HFSC_FSC = 0x2,
         HFSC_USC = 0x4
  };
  
-struct hfsc_class
-{
+struct hfsc_class {
         struct Qdisc_class_common cl_common;
         unsigned int    refcnt;         /* usage count */
  
@@ -140,8 +136,8 @@ struct hfsc_class
         u64     cl_cumul;               /* cumulative work in bytes done by
                                            real-time criteria */
  
-       u64     cl_d;                   /* deadline*/
-       u64     cl_e;                   /* eligible time */
+       u64     cl_d;                   /* deadline*/
+       u64     cl_e;                   /* eligible time */
         u64     cl_vt;                  /* virtual time */
         u64     cl_f;                   /* time when this class will fit for
                                            link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
         unsigned long   cl_nactive;     /* number of active children */
  };
  
-struct hfsc_sched
-{
+struct hfsc_sched {
         u16     defcls;                         /* default class id */
         struct hfsc_class root;                 /* root class */
         struct Qdisc_class_hash clhash;         /* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
                 if (go_active) {
                         n = rb_last(&cl->cl_parent->vt_tree);
                         if (n != NULL) {
-                               max_cl = rb_entry(n, struct hfsc_class,vt_node);
+                               max_cl = rb_entry(n, struct hfsc_class, vt_node);
                                 /*
                                  * set vt to the average of the min and max
                                  * classes.  if the parent's period didn't
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                         return NULL;
                 }
  #endif
-               if ((cl = (struct hfsc_class *)res.class) == NULL) {
-                       if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+               cl = (struct hfsc_class *)res.class;
+               if (!cl) {
+                       cl = hfsc_find_class(res.classid, sch);
+                       if (!cl)
                                 break; /* filter selected invalid classid */
                         if (cl->level >= head->level)
                                 break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
         return -1;
  }
  
-static inline int
+static int
  hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
  {
         if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
         struct hfsc_class *cl;
         u64 next_time = 0;
  
-       if ((cl = eltree_get_minel(q)) != NULL)
+       cl = eltree_get_minel(q);
+       if (cl)
                 next_time = cl->cl_e;
         if (q->root.cl_cfmin != 0) {
                 if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1625,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
          * find the class with the minimum deadline among
          * the eligible classes.
          */
-       if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+       cl = eltree_get_mindl(q, cur_time);
+       if (cl) {
                 realtime = 1;
         } else {
                 /*
@@ -1664,7 +1663,7 @@ hfsc_dequeue(struct Qdisc *sch)
                 set_passive(cl);
         }
  
-       sch->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(sch);
         qdisc_bstats_update(sch, skb);
         sch->q.qlen--;
  
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c

index fc12fe6..e1429a8 100644 (file)
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
                         struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
                         struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
                         /* When class changes from state 1->2 and disconnects from
-                          parent's feed then we lost ptr value and start from the
-                          first child again. Here we store classid of the
-                          last valid ptr (used when ptr is NULL). */
+                        * parent's feed then we lost ptr value and start from the
+                        * first child again. Here we store classid of the
+                        * last valid ptr (used when ptr is NULL).
+                        */
                         u32 last_ptr_id[TC_HTB_NUMPRIO];
                 } inner;
         } un;
@@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
   * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
   * then finish and return direct queue.
   */
-#define HTB_DIRECT (struct htb_class*)-1
+#define HTB_DIRECT ((struct htb_class *)-1L)
  
  static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
                                       int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
         int result;
  
         /* allow to select class by setting skb->priority to valid classid;
-          note that nfmark can be used too by attaching filter fw with no
-          rules in it */
+        * note that nfmark can be used too by attaching filter fw with no
+        * rules in it
+        */
         if (skb->priority == sch->handle)
                 return HTB_DIRECT;      /* X:0 (direct flow) selected */
-       if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+       cl = htb_find(skb->priority, sch);
+       if (cl && cl->level == 0)
                 return cl;
  
         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
                         return NULL;
                 }
  #endif
-               if ((cl = (void *)res.class) == NULL) {
+               cl = (void *)res.class;
+               if (!cl) {
                         if (res.classid == sch->handle)
                                 return HTB_DIRECT;      /* X:0 (direct flow) */
-                       if ((cl = htb_find(res.classid, sch)) == NULL)
+                       cl = htb_find(res.classid, sch);
+                       if (!cl)
                                 break;  /* filter selected invalid classid */
                 }
                 if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
  
                         if (p->un.inner.feed[prio].rb_node)
                                 /* parent already has its feed in use so that
-                                  reset bit in mask as parent is already ok */
+                                * reset bit in mask as parent is already ok
+                                */
                                 mask &= ~(1 << prio);
  
                         htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
  
                         if (p->un.inner.ptr[prio] == cl->node + prio) {
                                 /* we are removing child which is pointed to from
-                                  parent feed - forget the pointer but remember
-                                  classid */
+                                * parent feed - forget the pointer but remember
+                                * classid
+                                */
                                 p->un.inner.last_ptr_id[prio] = cl->common.classid;
                                 p->un.inner.ptr[prio] = NULL;
                         }
@@ -663,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
                                    unsigned long start)
  {
         /* don't run for longer than 2 jiffies; 2 is used instead of
-          1 to simplify things when jiffy is going to be incremented
-          too soon */
+        * 1 to simplify things when jiffy is going to be incremented
+        * too soon
+        */
         unsigned long stop_at = start + 2;
         while (time_before(jiffies, stop_at)) {
                 struct htb_class *cl;
@@ -687,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
  
         /* too much load - let's continue after a break for scheduling */
         if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
-               printk(KERN_WARNING "htb: too many events!\n");
+               pr_warning("htb: too many events!\n");
                 q->warned |= HTB_WARN_TOOMANYEVENTS;
         }
  
@@ -695,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
  }
  
  /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
-   is no such one exists. */
+ * is no such one exists.
+ */
  static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
                                               u32 id)
  {
@@ -739,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
         for (i = 0; i < 65535; i++) {
                 if (!*sp->pptr && *sp->pid) {
                         /* ptr was invalidated but id is valid - try to recover
-                          the original or next ptr */
+                        * the original or next ptr
+                        */
                         *sp->pptr =
                             htb_id_find_next_upper(prio, sp->root, *sp->pid);
                 }
                 *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
-                                  can become out of date quickly */
+                                * can become out of date quickly
+                                */
                 if (!*sp->pptr) {       /* we are at right end; rewind & go up */
                         *sp->pptr = sp->root;
                         while ((*sp->pptr)->rb_left)
@@ -772,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
  }
  
  /* dequeues packet at given priority and level; call only if
-   you are sure that there is active class at prio/level */
+ * you are sure that there is active class at prio/level
+ */
  static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
                                         int level)
  {
@@ -789,9 +801,10 @@ next:
                         return NULL;
  
                 /* class can be empty - it is unlikely but can be true if leaf
-                  qdisc drops packets in enqueue routine or if someone used
-                  graft operation on the leaf since last dequeue;
-                  simply deactivate and skip such class */
+                * qdisc drops packets in enqueue routine or if someone used
+                * graft operation on the leaf since last dequeue;
+                * simply deactivate and skip such class
+                */
                 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
                         struct htb_class *next;
                         htb_deactivate(q, cl);
@@ -831,7 +844,8 @@ next:
                                           ptr[0]) + prio);
                 }
                 /* this used to be after charge_class but this constelation
-                  gives us slightly better performance */
+                * gives us slightly better performance
+                */
                 if (!cl->un.leaf.q->q.qlen)
                         htb_deactivate(q, cl);
                 htb_charge_class(q, cl, level, skb);
@@ -852,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
         if (skb != NULL) {
  ok:
                 qdisc_bstats_update(sch, skb);
-               sch->flags &= ~TCQ_F_THROTTLED;
+               qdisc_unthrottled(sch);
                 sch->q.qlen--;
                 return skb;
         }
@@ -883,6 +897,7 @@ ok:
                 m = ~q->row_mask[level];
                 while (m != (int)(-1)) {
                         int prio = ffz(m);
+
                         m |= 1 << prio;
                         skb = htb_dequeue_tree(q, prio, level);
                         if (likely(skb != NULL))
@@ -987,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
                 return err;
  
         if (tb[TCA_HTB_INIT] == NULL) {
-               printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+               pr_err("HTB: hey probably you have bad tc tool ?\n");
                 return -EINVAL;
         }
         gopt = nla_data(tb[TCA_HTB_INIT]);
         if (gopt->version != HTB_VER >> 16) {
-               printk(KERN_ERR
-                      "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+               pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
                        HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
                 return -EINVAL;
         }
@@ -1206,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
         cancel_work_sync(&q->work);
         qdisc_watchdog_cancel(&q->watchdog);
         /* This line used to be after htb_destroy_class call below
-          and surprisingly it worked in 2.4. But it must precede it
-          because filter need its target class alive to be able to call
-          unbind_filter on it (without Oops). */
+        * and surprisingly it worked in 2.4. But it must precede it
+        * because filter need its target class alive to be able to call
+        * unbind_filter on it (without Oops).
+        */
         tcf_destroy_chain(&q->filter_list);
  
         for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1342,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
  
                 /* check maximal depth */
                 if (parent && parent->parent && parent->parent->level < 2) {
-                       printk(KERN_ERR "htb: tree is too deep\n");
+                       pr_err("htb: tree is too deep\n");
                         goto failure;
                 }
                 err = -ENOBUFS;
-               if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+               cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+               if (!cl)
                         goto failure;
  
                 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1366,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                         RB_CLEAR_NODE(&cl->node[prio]);
  
                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
-                  so that can't be used inside of sch_tree_lock
-                  -- thanks to Karlis Peisenieks */
+                * so that can't be used inside of sch_tree_lock
+                * -- thanks to Karlis Peisenieks
+                */
                 new_q = qdisc_create_dflt(sch->dev_queue,
                                           &pfifo_qdisc_ops, classid);
                 sch_tree_lock(sch);
@@ -1419,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
         }
  
         /* it used to be a nasty bug here, we have to check that node
-          is really leaf before changing cl->un.leaf ! */
+        * is really leaf before changing cl->un.leaf !
+        */
         if (!cl->level) {
                 cl->quantum = rtab->rate.rate / q->rate2quantum;
                 if (!hopt->quantum && cl->quantum < 1000) {
-                       printk(KERN_WARNING
+                       pr_warning(
                                "HTB: quantum of class %X is small. Consider r2q change.\n",
                                cl->common.classid);
                         cl->quantum = 1000;
                 }
                 if (!hopt->quantum && cl->quantum > 200000) {
-                       printk(KERN_WARNING
+                       pr_warning(
                                "HTB: quantum of class %X is big. Consider r2q change.\n",
                                cl->common.classid);
                         cl->quantum = 200000;
@@ -1478,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
         struct htb_class *cl = htb_find(classid, sch);
  
         /*if (cl && !cl->level) return 0;
-          The line above used to be there to prevent attaching filters to
-          leaves. But at least tc_index filter uses this just to get class
-          for other reasons so that we have to allow for it.
-          ----
-          19.6.2002 As Werner explained it is ok - bind filter is just
-          another way to "lock" the class - unlike "get" this lock can
-          be broken by class during destroy IIUC.
+        * The line above used to be there to prevent attaching filters to
+        * leaves. But at least tc_index filter uses this just to get class
+        * for other reasons so that we have to allow for it.
+        * ----
+        * 19.6.2002 As Werner explained it is ok - bind filter is just
+        * another way to "lock" the class - unlike "get" this lock can
+        * be broken by class during destroy IIUC.
          */
         if (cl)
                 cl->filter_cnt++;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c

index ecc302f..ec5cbc8 100644 (file)
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
                                                     TC_H_MIN(ntx + 1)));
                 if (qdisc == NULL)
                         goto err;
-               qdisc->flags |= TCQ_F_CAN_BYPASS;
                 priv->qdiscs[ntx] = qdisc;
         }
  
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c

new file mode 100644 (file)

index 0000000..fbc6f53
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,416 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+       struct Qdisc            **qdiscs;
+       int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       unsigned int ntx;
+
+       if (!priv->qdiscs)
+               return;
+
+       for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+               qdisc_destroy(priv->qdiscs[ntx]);
+
+       if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+               dev->netdev_ops->ndo_setup_tc(dev, 0);
+       else
+               netdev_set_num_tc(dev, 0);
+
+       kfree(priv->qdiscs);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+       int i, j;
+
+       /* Verify num_tc is not out of max range */
+       if (qopt->num_tc > TC_MAX_QUEUE)
+               return -EINVAL;
+
+       /* Verify priority mapping uses valid tcs */
+       for (i = 0; i < TC_BITMASK + 1; i++) {
+               if (qopt->prio_tc_map[i] >= qopt->num_tc)
+                       return -EINVAL;
+       }
+
+       /* net_device does not support requested operation */
+       if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+               return -EINVAL;
+
+       /* if hw owned qcount and qoffset are taken from LLD so
+        * no reason to verify them here
+        */
+       if (qopt->hw)
+               return 0;
+
+       for (i = 0; i < qopt->num_tc; i++) {
+               unsigned int last = qopt->offset[i] + qopt->count[i];
+
+               /* Verify the queue count is in tx range being equal to the
+                * real_num_tx_queues indicates the last queue is in use.
+                */
+               if (qopt->offset[i] >= dev->real_num_tx_queues ||
+                   !qopt->count[i] ||
+                   last > dev->real_num_tx_queues)
+                       return -EINVAL;
+
+               /* Verify that the offset and counts do not overlap */
+               for (j = i + 1; j < qopt->num_tc; j++) {
+                       if (last > qopt->offset[j])
+                               return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       struct netdev_queue *dev_queue;
+       struct Qdisc *qdisc;
+       int i, err = -EOPNOTSUPP;
+       struct tc_mqprio_qopt *qopt = NULL;
+
+       BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+       BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
+
+       if (sch->parent != TC_H_ROOT)
+               return -EOPNOTSUPP;
+
+       if (!netif_is_multiqueue(dev))
+               return -EOPNOTSUPP;
+
+       if (nla_len(opt) < sizeof(*qopt))
+               return -EINVAL;
+
+       qopt = nla_data(opt);
+       if (mqprio_parse_opt(dev, qopt))
+               return -EINVAL;
+
+       /* pre-allocate qdisc, attachment can't fail */
+       priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+                              GFP_KERNEL);
+       if (priv->qdiscs == NULL) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               dev_queue = netdev_get_tx_queue(dev, i);
+               qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+                                         TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                   TC_H_MIN(i + 1)));
+               if (qdisc == NULL) {
+                       err = -ENOMEM;
+                       goto err;
+               }
+               priv->qdiscs[i] = qdisc;
+       }
+
+       /* If the mqprio options indicate that hardware should own
+        * the queue mapping then run ndo_setup_tc otherwise use the
+        * supplied and verified mapping
+        */
+       if (qopt->hw) {
+               priv->hw_owned = 1;
+               err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+               if (err)
+                       goto err;
+       } else {
+               netdev_set_num_tc(dev, qopt->num_tc);
+               for (i = 0; i < qopt->num_tc; i++)
+                       netdev_set_tc_queue(dev, i,
+                                           qopt->count[i], qopt->offset[i]);
+       }
+
+       /* Always use supplied priority mappings */
+       for (i = 0; i < TC_BITMASK + 1; i++)
+               netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
+
+       sch->flags |= TCQ_F_MQROOT;
+       return 0;
+
+err:
+       mqprio_destroy(sch);
+       return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       struct Qdisc *qdisc;
+       unsigned int ntx;
+
+       /* Attach underlying qdisc */
+       for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+               qdisc = priv->qdiscs[ntx];
+               qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+               if (qdisc)
+                       qdisc_destroy(qdisc);
+       }
+       kfree(priv->qdiscs);
+       priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+                                            unsigned long cl)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+
+       if (ntx >= dev->num_tx_queues)
+               return NULL;
+       return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+                   struct Qdisc **old)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+       if (!dev_queue)
+               return -EINVAL;
+
+       if (dev->flags & IFF_UP)
+               dev_deactivate(dev);
+
+       *old = dev_graft_qdisc(dev_queue, new);
+
+       if (dev->flags & IFF_UP)
+               dev_activate(dev);
+
+       return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       unsigned char *b = skb_tail_pointer(skb);
+       struct tc_mqprio_qopt opt;
+       struct Qdisc *qdisc;
+       unsigned int i;
+
+       sch->q.qlen = 0;
+       memset(&sch->bstats, 0, sizeof(sch->bstats));
+       memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+               spin_lock_bh(qdisc_lock(qdisc));
+               sch->q.qlen             += qdisc->q.qlen;
+               sch->bstats.bytes       += qdisc->bstats.bytes;
+               sch->bstats.packets     += qdisc->bstats.packets;
+               sch->qstats.qlen        += qdisc->qstats.qlen;
+               sch->qstats.backlog     += qdisc->qstats.backlog;
+               sch->qstats.drops       += qdisc->qstats.drops;
+               sch->qstats.requeues    += qdisc->qstats.requeues;
+               sch->qstats.overlimits  += qdisc->qstats.overlimits;
+               spin_unlock_bh(qdisc_lock(qdisc));
+       }
+
+       opt.num_tc = netdev_get_num_tc(dev);
+       memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+       opt.hw = priv->hw_owned;
+
+       for (i = 0; i < netdev_get_num_tc(dev); i++) {
+               opt.count[i] = dev->tc_to_txq[i].count;
+               opt.offset[i] = dev->tc_to_txq[i].offset;
+       }
+
+       NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+       return skb->len;
+nla_put_failure:
+       nlmsg_trim(skb, b);
+       return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+       struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+       if (!dev_queue)
+               return NULL;
+
+       return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned int ntx = TC_H_MIN(classid);
+
+       if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
+               return 0;
+       return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+                        struct sk_buff *skb, struct tcmsg *tcm)
+{
+       struct net_device *dev = qdisc_dev(sch);
+
+       if (cl <= netdev_get_num_tc(dev)) {
+               tcm->tcm_parent = TC_H_ROOT;
+               tcm->tcm_info = 0;
+       } else {
+               int i;
+               struct netdev_queue *dev_queue;
+
+               dev_queue = mqprio_queue_get(sch, cl);
+               tcm->tcm_parent = 0;
+               for (i = 0; i < netdev_get_num_tc(dev); i++) {
+                       struct netdev_tc_txq tc = dev->tc_to_txq[i];
+                       int q_idx = cl - netdev_get_num_tc(dev);
+
+                       if (q_idx > tc.offset &&
+                           q_idx <= tc.offset + tc.count) {
+                               tcm->tcm_parent =
+                                       TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                 TC_H_MIN(i + 1));
+                               break;
+                       }
+               }
+               tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+       }
+       tcm->tcm_handle |= TC_H_MIN(cl);
+       return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+                              struct gnet_dump *d)
+{
+       struct net_device *dev = qdisc_dev(sch);
+
+       if (cl <= netdev_get_num_tc(dev)) {
+               int i;
+               struct Qdisc *qdisc;
+               struct gnet_stats_queue qstats = {0};
+               struct gnet_stats_basic_packed bstats = {0};
+               struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+
+               /* Drop lock here it will be reclaimed before touching
+                * statistics this is required because the d->lock we
+                * hold here is the look on dev_queue->qdisc_sleeping
+                * also acquired below.
+                */
+               spin_unlock_bh(d->lock);
+
+               for (i = tc.offset; i < tc.offset + tc.count; i++) {
+                       qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+                       spin_lock_bh(qdisc_lock(qdisc));
+                       bstats.bytes      += qdisc->bstats.bytes;
+                       bstats.packets    += qdisc->bstats.packets;
+                       qstats.qlen       += qdisc->qstats.qlen;
+                       qstats.backlog    += qdisc->qstats.backlog;
+                       qstats.drops      += qdisc->qstats.drops;
+                       qstats.requeues   += qdisc->qstats.requeues;
+                       qstats.overlimits += qdisc->qstats.overlimits;
+                       spin_unlock_bh(qdisc_lock(qdisc));
+               }
+               /* Reclaim root sleeping lock before completing stats */
+               spin_lock_bh(d->lock);
+               if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+                   gnet_stats_copy_queue(d, &qstats) < 0)
+                       return -1;
+       } else {
+               struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+               sch = dev_queue->qdisc_sleeping;
+               sch->qstats.qlen = sch->q.qlen;
+               if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+                   gnet_stats_copy_queue(d, &sch->qstats) < 0)
+                       return -1;
+       }
+       return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned long ntx;
+
+       if (arg->stop)
+               return;
+
+       /* Walk hierarchy with a virtual class per tc */
+       arg->count = arg->skip;
+       for (ntx = arg->skip;
+            ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
+            ntx++) {
+               if (arg->fn(sch, ntx + 1, arg) < 0) {
+                       arg->stop = 1;
+                       break;
+               }
+               arg->count++;
+       }
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+       .graft          = mqprio_graft,
+       .leaf           = mqprio_leaf,
+       .get            = mqprio_get,
+       .put            = mqprio_put,
+       .walk           = mqprio_walk,
+       .dump           = mqprio_dump_class,
+       .dump_stats     = mqprio_dump_class_stats,
+};
+
+struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+       .cl_ops         = &mqprio_class_ops,
+       .id             = "mqprio",
+       .priv_size      = sizeof(struct mqprio_sched),
+       .init           = mqprio_init,
+       .destroy        = mqprio_destroy,
+       .attach         = mqprio_attach,
+       .dump           = mqprio_dump,
+       .owner          = THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+       return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+       unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c

index 436a2e7..edc1950 100644 (file)
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
         unsigned int len;
         struct Qdisc *qdisc;
  
-       for (band = q->bands-1; band >= 0; band--) {
+       for (band = q->bands - 1; band >= 0; band--) {
                 qdisc = q->queues[band];
                 if (qdisc->ops->drop) {
                         len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
         for (i = 0; i < q->max_bands; i++)
                 q->queues[i] = &noop_qdisc;
  
-       err = multiq_tune(sch,opt);
+       err = multiq_tune(sch, opt);
  
         if (err)
                 kfree(q->queues);
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
         struct multiq_sched_data *q = qdisc_priv(sch);
  
         tcm->tcm_handle |= TC_H_MIN(cl);
-       tcm->tcm_info = q->queues[cl-1]->handle;
+       tcm->tcm_info = q->queues[cl - 1]->handle;
         return 0;
  }
  
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                         arg->count++;
                         continue;
                 }
-               if (arg->fn(sch, band+1, arg) < 0) {
+               if (arg->fn(sch, band + 1, arg) < 0) {
                         arg->stop = 1;
                         break;
                 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c

index 6a3006b..64f0d32 100644 (file)
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -211,8 +211,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
         }
  
         cb = netem_skb_cb(skb);
-       if (q->gap == 0 ||              /* not doing reordering */
-           q->counter < q->gap ||      /* inside last reordering gap */
+       if (q->gap == 0 ||              /* not doing reordering */
+           q->counter < q->gap ||      /* inside last reordering gap */
             q->reorder < get_crandom(&q->reorder_cor)) {
                 psched_time_t now;
                 psched_tdiff_t delay;
@@ -248,7 +248,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
         return ret;
  }
  
-static unsigned int netem_drop(struct Qdisc* sch)
+static unsigned int netem_drop(struct Qdisc *sch)
  {
         struct netem_sched_data *q = qdisc_priv(sch);
         unsigned int len = 0;
@@ -265,7 +265,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
         struct netem_sched_data *q = qdisc_priv(sch);
         struct sk_buff *skb;
  
-       if (sch->flags & TCQ_F_THROTTLED)
+       if (qdisc_is_throttled(sch))
                 return NULL;
  
         skb = q->qdisc->ops->peek(q->qdisc);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c

index fbd710d..2a318f2 100644 (file)
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
  #include <net/pkt_sched.h>
  
  
-struct prio_sched_data
-{
+struct prio_sched_data {
         int bands;
         struct tcf_proto *filter_list;
         u8  prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                 if (!q->filter_list || err < 0) {
                         if (TC_H_MAJ(band))
                                 band = 0;
-                       return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+                       return q->queues[q->prio2band[band & TC_PRIO_MAX]];
                 }
                 band = res.classid;
         }
@@ -106,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
         return NULL;
  }
  
-static struct sk_buff *prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc *sch)
  {
         struct prio_sched_data *q = qdisc_priv(sch);
         int prio;
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
  
  }
  
-static unsigned int prio_drop(struct Qdisc* sch)
+static unsigned int prio_drop(struct Qdisc *sch)
  {
         struct prio_sched_data *q = qdisc_priv(sch);
         int prio;
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
  
  
  static void
-prio_reset(struct Qdisc* sch)
+prio_reset(struct Qdisc *sch)
  {
         int prio;
         struct prio_sched_data *q = qdisc_priv(sch);
  
-       for (prio=0; prio<q->bands; prio++)
+       for (prio = 0; prio < q->bands; prio++)
                 qdisc_reset(q->queues[prio]);
         sch->q.qlen = 0;
  }
  
  static void
-prio_destroy(struct Qdisc* sch)
+prio_destroy(struct Qdisc *sch)
  {
         int prio;
         struct prio_sched_data *q = qdisc_priv(sch);
  
         tcf_destroy_chain(&q->filter_list);
-       for (prio=0; prio<q->bands; prio++)
+       for (prio = 0; prio < q->bands; prio++)
                 qdisc_destroy(q->queues[prio]);
  }
  
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
         if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
                 return -EINVAL;
  
-       for (i=0; i<=TC_PRIO_MAX; i++) {
+       for (i = 0; i <= TC_PRIO_MAX; i++) {
                 if (qopt->priomap[i] >= qopt->bands)
                         return -EINVAL;
         }
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
         q->bands = qopt->bands;
         memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
  
-       for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+       for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
                 struct Qdisc *child = q->queues[i];
                 q->queues[i] = &noop_qdisc;
                 if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
         }
         sch_tree_unlock(sch);
  
-       for (i=0; i<q->bands; i++) {
+       for (i = 0; i < q->bands; i++) {
                 if (q->queues[i] == &noop_qdisc) {
                         struct Qdisc *child, *old;
+
                         child = qdisc_create_dflt(sch->dev_queue,
                                                   &pfifo_qdisc_ops,
                                                   TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
         struct prio_sched_data *q = qdisc_priv(sch);
         int i;
  
-       for (i=0; i<TCQ_PRIO_BANDS; i++)
+       for (i = 0; i < TCQ_PRIO_BANDS; i++)
                 q->queues[i] = &noop_qdisc;
  
         if (opt == NULL) {
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
         } else {
                 int err;
  
-               if ((err= prio_tune(sch, opt)) != 0)
+               if ((err = prio_tune(sch, opt)) != 0)
                         return err;
         }
         return 0;
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
         struct tc_prio_qopt opt;
  
         opt.bands = q->bands;
-       memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+       memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
  
         NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
  
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                         arg->count++;
                         continue;
                 }
-               if (arg->fn(sch, prio+1, arg) < 0) {
+               if (arg->fn(sch, prio + 1, arg) < 0) {
                         arg->stop = 1;
                         break;
                 }
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
         }
  }
  
-static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
  {
         struct prio_sched_data *q = qdisc_priv(sch);
  
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c

index 9f98dbd..6649463 100644 (file)
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
         if RED works correctly.
   */
  
-struct red_sched_data
-{
+struct red_sched_data {
         u32                     limit;          /* HARD maximal queue length */
         unsigned char           flags;
         struct red_parms        parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
         return q->flags & TC_RED_HARDDROP;
  }
  
-static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
         struct red_sched_data *q = qdisc_priv(sch);
         struct Qdisc *child = q->qdisc;
@@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                 red_end_of_idle_period(&q->parms);
  
         switch (red_action(&q->parms, q->parms.qavg)) {
-               case RED_DONT_MARK:
-                       break;
-
-               case RED_PROB_MARK:
-                       sch->qstats.overlimits++;
-                       if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-                               q->stats.prob_drop++;
-                               goto congestion_drop;
-                       }
-
-                       q->stats.prob_mark++;
-                       break;
-
-               case RED_HARD_MARK:
-                       sch->qstats.overlimits++;
-                       if (red_use_harddrop(q) || !red_use_ecn(q) ||
-                           !INET_ECN_set_ce(skb)) {
-                               q->stats.forced_drop++;
-                               goto congestion_drop;
-                       }
-
-                       q->stats.forced_mark++;
-                       break;
+       case RED_DONT_MARK:
+               break;
+
+       case RED_PROB_MARK:
+               sch->qstats.overlimits++;
+               if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+                       q->stats.prob_drop++;
+                       goto congestion_drop;
+               }
+
+               q->stats.prob_mark++;
+               break;
+
+       case RED_HARD_MARK:
+               sch->qstats.overlimits++;
+               if (red_use_harddrop(q) || !red_use_ecn(q) ||
+                   !INET_ECN_set_ce(skb)) {
+                       q->stats.forced_drop++;
+                       goto congestion_drop;
+               }
+
+               q->stats.forced_mark++;
+               break;
         }
  
         ret = qdisc_enqueue(skb, child);
@@ -106,7 +105,7 @@ congestion_drop:
         return NET_XMIT_CN;
  }
  
-static struct sk_buff * red_dequeue(struct Qdisc* sch)
+static struct sk_buff *red_dequeue(struct Qdisc *sch)
  {
         struct sk_buff *skb;
         struct red_sched_data *q = qdisc_priv(sch);
@@ -123,7 +122,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
         return skb;
  }
  
-static struct sk_buff * red_peek(struct Qdisc* sch)
+static struct sk_buff *red_peek(struct Qdisc *sch)
  {
         struct red_sched_data *q = qdisc_priv(sch);
         struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
         return child->ops->peek(child);
  }
  
-static unsigned int red_drop(struct Qdisc* sch)
+static unsigned int red_drop(struct Qdisc *sch)
  {
         struct red_sched_data *q = qdisc_priv(sch);
         struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
         return 0;
  }
  
-static void red_reset(struct Qdisc* sch)
+static void red_reset(struct Qdisc *sch)
  {
         struct red_sched_data *q = qdisc_priv(sch);
  
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
         return 0;
  }
  
-static int red_init(struct Qdisc* sch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt)
  {
         struct red_sched_data *q = qdisc_priv(sch);
  
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c

index edea8ce..4cff442 100644 (file)
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
  #include <linux/skbuff.h>
  #include <linux/jhash.h>
  #include <linux/slab.h>
+#include <linux/vmalloc.h>
  #include <net/ip.h>
  #include <net/netlink.h>
  #include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
  #define SFQ_DEPTH              128 /* max number of packets per flow */
  #define SFQ_SLOTS              128 /* max number of flows */
  #define SFQ_EMPTY_SLOT         255
-#define SFQ_HASH_DIVISOR       1024
+#define SFQ_DEFAULT_HASH_DIVISOR 1024
+
  /* We use 16 bits to store allot, and want to handle packets up to 64K
   * Scale allot by 8 (1<<3) so that no overflow occurs.
   */
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index;
   * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
   * are 'pointers' to dep[] array
   */
-struct sfq_head
-{
+struct sfq_head {
         sfq_index       next;
         sfq_index       prev;
  };
@@ -108,13 +109,12 @@ struct sfq_slot {
         short           allot; /* credit for this slot */
  };
  
-struct sfq_sched_data
-{
+struct sfq_sched_data {
  /* Parameters */
         int             perturb_period;
-       unsigned        quantum;        /* Allotment per round: MUST BE >= MTU */
+       unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
         int             limit;
-
+       unsigned int    divisor;        /* number of slots in hash table */
  /* Variables */
         struct tcf_proto *filter_list;
         struct timer_list perturb_timer;
@@ -122,7 +122,7 @@ struct sfq_sched_data
         sfq_index       cur_depth;      /* depth of longest slot */
         unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
         struct sfq_slot *tail;          /* current slot in round */
-       sfq_index       ht[SFQ_HASH_DIVISOR];   /* Hash table */
+       sfq_index       *ht;            /* Hash table (divisor slots) */
         struct sfq_slot slots[SFQ_SLOTS];
         struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
  };
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
         return &q->dep[val - SFQ_SLOTS];
  }
  
-static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
  {
-       return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+       return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
  }
  
-static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
  {
         u32 h, h2;
  
@@ -157,13 +157,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
                 iph = ip_hdr(skb);
                 h = (__force u32)iph->daddr;
                 h2 = (__force u32)iph->saddr ^ iph->protocol;
-               if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+               if (iph->frag_off & htons(IP_MF | IP_OFFSET))
                         break;
                 poff = proto_ports_offset(iph->protocol);
                 if (poff >= 0 &&
                     pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
                         iph = ip_hdr(skb);
-                       h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
+                       h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
                 }
                 break;
         }
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
                 if (poff >= 0 &&
                     pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
                         iph = ipv6_hdr(skb);
-                       h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
+                       h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
                 }
                 break;
         }
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
  
         if (TC_H_MAJ(skb->priority) == sch->handle &&
             TC_H_MIN(skb->priority) > 0 &&
-           TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+           TC_H_MIN(skb->priority) <= q->divisor)
                 return TC_H_MIN(skb->priority);
  
         if (!q->filter_list)
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
                         return 0;
                 }
  #endif
-               if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+               if (TC_H_MIN(res.classid) <= q->divisor)
                         return TC_H_MIN(res.classid);
         }
         return 0;
@@ -497,7 +497,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
         q->perturb_period = ctl->perturb_period * HZ;
         if (ctl->limit)
                 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-
+       if (ctl->divisor) {
+               if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)
+                       return -EINVAL;
+               q->divisor = ctl->divisor;
+       }
         qlen = sch->q.qlen;
         while (sch->q.qlen > q->limit)
                 sfq_drop(sch);
@@ -515,15 +519,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
  static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
  {
         struct sfq_sched_data *q = qdisc_priv(sch);
+       size_t sz;
         int i;
  
         q->perturb_timer.function = sfq_perturbation;
         q->perturb_timer.data = (unsigned long)sch;
         init_timer_deferrable(&q->perturb_timer);
  
-       for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-               q->ht[i] = SFQ_EMPTY_SLOT;
-
         for (i = 0; i < SFQ_DEPTH; i++) {
                 q->dep[i].next = i + SFQ_SLOTS;
                 q->dep[i].prev = i + SFQ_SLOTS;
@@ -532,6 +534,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
         q->limit = SFQ_DEPTH - 1;
         q->cur_depth = 0;
         q->tail = NULL;
+       q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
         if (opt == NULL) {
                 q->quantum = psched_mtu(qdisc_dev(sch));
                 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -543,10 +546,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
                         return err;
         }
  
+       sz = sizeof(q->ht[0]) * q->divisor;
+       q->ht = kmalloc(sz, GFP_KERNEL);
+       if (!q->ht && sz > PAGE_SIZE)
+               q->ht = vmalloc(sz);
+       if (!q->ht)
+               return -ENOMEM;
+       for (i = 0; i < q->divisor; i++)
+               q->ht[i] = SFQ_EMPTY_SLOT;
+
         for (i = 0; i < SFQ_SLOTS; i++) {
                 slot_queue_init(&q->slots[i]);
                 sfq_link(q, i);
         }
+       if (q->limit >= 1)
+               sch->flags |= TCQ_F_CAN_BYPASS;
+       else
+               sch->flags &= ~TCQ_F_CAN_BYPASS;
         return 0;
  }
  
@@ -557,6 +573,10 @@ static void sfq_destroy(struct Qdisc *sch)
         tcf_destroy_chain(&q->filter_list);
         q->perturb_period = 0;
         del_timer_sync(&q->perturb_timer);
+       if (is_vmalloc_addr(q->ht))
+               vfree(q->ht);
+       else
+               kfree(q->ht);
  }
  
  static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -569,7 +589,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
         opt.perturb_period = q->perturb_period / HZ;
  
         opt.limit = q->limit;
-       opt.divisor = SFQ_HASH_DIVISOR;
+       opt.divisor = q->divisor;
         opt.flows = q->limit;
  
         NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -594,6 +614,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
  static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
                               u32 classid)
  {
+       /* we cannot bypass queue discipline anymore */
+       sch->flags &= ~TCQ_F_CAN_BYPASS;
         return 0;
  }
  
@@ -647,7 +669,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
         if (arg->stop)
                 return;
  
-       for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+       for (i = 0; i < q->divisor; i++) {
                 if (q->ht[i] == SFQ_EMPTY_SLOT ||
                     arg->count < arg->skip) {
                         arg->count++;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c

index e931658..1dcfb52 100644 (file)
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
         changed the limit is not effective anymore.
  */
  
-struct tbf_sched_data
-{
+struct tbf_sched_data {
  /* Parameters */
         u32             limit;          /* Maximal length of backlog: bytes */
         u32             buffer;         /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
         struct qdisc_watchdog watchdog; /* Watchdog timer */
  };
  
-#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
-#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
+#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
  
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
         struct tbf_sched_data *q = qdisc_priv(sch);
         int ret;
@@ -137,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
         return NET_XMIT_SUCCESS;
  }
  
-static unsigned int tbf_drop(struct Qdisc* sch)
+static unsigned int tbf_drop(struct Qdisc *sch)
  {
         struct tbf_sched_data *q = qdisc_priv(sch);
         unsigned int len = 0;
@@ -149,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
         return len;
  }
  
-static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
  {
         struct tbf_sched_data *q = qdisc_priv(sch);
         struct sk_buff *skb;
@@ -185,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
                         q->tokens = toks;
                         q->ptokens = ptoks;
                         sch->q.qlen--;
-                       sch->flags &= ~TCQ_F_THROTTLED;
+                       qdisc_unthrottled(sch);
                         qdisc_bstats_update(sch, skb);
                         return skb;
                 }
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
         return NULL;
  }
  
-static void tbf_reset(struct Qdisc* sch)
+static void tbf_reset(struct Qdisc *sch)
  {
         struct tbf_sched_data *q = qdisc_priv(sch);
  
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
         [TCA_TBF_PTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
  };
  
-static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
  {
         int err;
         struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
         struct qdisc_rate_table *rtab = NULL;
         struct qdisc_rate_table *ptab = NULL;
         struct Qdisc *child = NULL;
-       int max_size,n;
+       int max_size, n;
  
         err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
         if (err < 0)
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
         }
  
         for (n = 0; n < 256; n++)
-               if (rtab->data[n] > qopt->buffer) break;
-       max_size = (n << qopt->rate.cell_log)-1;
+               if (rtab->data[n] > qopt->buffer)
+                       break;
+       max_size = (n << qopt->rate.cell_log) - 1;
         if (ptab) {
                 int size;
  
                 for (n = 0; n < 256; n++)
-                       if (ptab->data[n] > qopt->mtu) break;
-               size = (n << qopt->peakrate.cell_log)-1;
-               if (size < max_size) max_size = size;
+                       if (ptab->data[n] > qopt->mtu)
+                               break;
+               size = (n << qopt->peakrate.cell_log) - 1;
+               if (size < max_size)
+                       max_size = size;
         }
         if (max_size < 0)
                 goto done;
@@ -310,7 +312,7 @@ done:
         return err;
  }
  
-static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
  {
         struct tbf_sched_data *q = qdisc_priv(sch);
  
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
         }
  }
  
-static const struct Qdisc_class_ops tbf_class_ops =
-{
+static const struct Qdisc_class_ops tbf_class_ops = {
         .graft          =       tbf_graft,
         .leaf           =       tbf_leaf,
         .get            =       tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c

index d84e732..45cd300 100644 (file)
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,8 +53,7 @@
        which will not break load balancing, though native slave
        traffic will have the highest priority.  */
  
-struct teql_master
-{
+struct teql_master {
         struct Qdisc_ops qops;
         struct net_device *dev;
         struct Qdisc *slaves;
@@ -65,22 +64,21 @@ struct teql_master
         unsigned long   tx_dropped;
  };
  
-struct teql_sched_data
-{
+struct teql_sched_data {
         struct Qdisc *next;
         struct teql_master *m;
         struct neighbour *ncache;
         struct sk_buff_head q;
  };
  
-#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
  
-#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
  
  /* "teql*" qdisc routines */
  
  static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
  {
         struct net_device *dev = qdisc_dev(sch);
         struct teql_sched_data *q = qdisc_priv(sch);
@@ -96,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  }
  
  static struct sk_buff *
-teql_dequeue(struct Qdisc* sch)
+teql_dequeue(struct Qdisc *sch)
  {
         struct teql_sched_data *dat = qdisc_priv(sch);
         struct netdev_queue *dat_queue;
@@ -118,13 +116,13 @@ teql_dequeue(struct Qdisc* sch)
  }
  
  static struct sk_buff *
-teql_peek(struct Qdisc* sch)
+teql_peek(struct Qdisc *sch)
  {
         /* teql is meant to be used as root qdisc */
         return NULL;
  }
  
-static __inline__ void
+static inline void
  teql_neigh_release(struct neighbour *n)
  {
         if (n)
@@ -132,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
  }
  
  static void
-teql_reset(struct Qdisc* sch)
+teql_reset(struct Qdisc *sch)
  {
         struct teql_sched_data *dat = qdisc_priv(sch);
  
@@ -142,13 +140,14 @@ teql_reset(struct Qdisc* sch)
  }
  
  static void
-teql_destroy(struct Qdisc* sch)
+teql_destroy(struct Qdisc *sch)
  {
         struct Qdisc *q, *prev;
         struct teql_sched_data *dat = qdisc_priv(sch);
         struct teql_master *master = dat->m;
  
-       if ((prev = master->slaves) != NULL) {
+       prev = master->slaves;
+       if (prev) {
                 do {
                         q = NEXT_SLAVE(prev);
                         if (q == sch) {
@@ -180,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
  static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
  {
         struct net_device *dev = qdisc_dev(sch);
-       struct teql_master *m = (struct teql_master*)sch->ops;
+       struct teql_master *m = (struct teql_master *)sch->ops;
         struct teql_sched_data *q = qdisc_priv(sch);
  
         if (dev->hard_header_len > m->dev->hard_header_len)
@@ -291,7 +290,8 @@ restart:
         nores = 0;
         busy = 0;
  
-       if ((q = start) == NULL)
+       q = start;
+       if (!q)
                 goto drop;
  
         do {
@@ -356,10 +356,10 @@ drop:
  
  static int teql_master_open(struct net_device *dev)
  {
-       struct Qdisc * q;
+       struct Qdisc *q;
         struct teql_master *m = netdev_priv(dev);
         int mtu = 0xFFFE;
-       unsigned flags = IFF_NOARP|IFF_MULTICAST;
+       unsigned int flags = IFF_NOARP | IFF_MULTICAST;
  
         if (m->slaves == NULL)
                 return -EUNATCH;
@@ -427,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
                 do {
                         if (new_mtu > qdisc_dev(q)->mtu)
                                 return -EINVAL;
-               } while ((q=NEXT_SLAVE(q)) != m->slaves);
+               } while ((q = NEXT_SLAVE(q)) != m->slaves);
         }
  
         dev->mtu = new_mtu;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index dd419d2..d8d98d5 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1475,6 +1475,12 @@ restart:
                         goto out_free;
         }
  
+       if (sk_filter(other, skb) < 0) {
+               /* Toss the packet but do not return any error to the sender */
+               err = len;
+               goto out_free;
+       }
+
         unix_state_lock(other);
         err = -EPERM;
         if (!unix_may_send(sk, other))
@@ -1978,36 +1984,38 @@ static int unix_shutdown(struct socket *sock, int mode)
  
         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
  
-       if (mode) {
-               unix_state_lock(sk);
-               sk->sk_shutdown |= mode;
-               other = unix_peer(sk);
-               if (other)
-                       sock_hold(other);
-               unix_state_unlock(sk);
-               sk->sk_state_change(sk);
-
-               if (other &&
-                       (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
-
-                       int peer_mode = 0;
-
-                       if (mode&RCV_SHUTDOWN)
-                               peer_mode |= SEND_SHUTDOWN;
-                       if (mode&SEND_SHUTDOWN)
-                               peer_mode |= RCV_SHUTDOWN;
-                       unix_state_lock(other);
-                       other->sk_shutdown |= peer_mode;
-                       unix_state_unlock(other);
-                       other->sk_state_change(other);
-                       if (peer_mode == SHUTDOWN_MASK)
-                               sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
-                       else if (peer_mode & RCV_SHUTDOWN)
-                               sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-               }
-               if (other)
-                       sock_put(other);
+       if (!mode)
+               return 0;
+
+       unix_state_lock(sk);
+       sk->sk_shutdown |= mode;
+       other = unix_peer(sk);
+       if (other)
+               sock_hold(other);
+       unix_state_unlock(sk);
+       sk->sk_state_change(sk);
+
+       if (other &&
+               (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
+
+               int peer_mode = 0;
+
+               if (mode&RCV_SHUTDOWN)
+                       peer_mode |= SEND_SHUTDOWN;
+               if (mode&SEND_SHUTDOWN)
+                       peer_mode |= RCV_SHUTDOWN;
+               unix_state_lock(other);
+               other->sk_shutdown |= peer_mode;
+               unix_state_unlock(other);
+               other->sk_state_change(other);
+               if (peer_mode == SHUTDOWN_MASK)
+                       sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
+               else if (peer_mode & RCV_SHUTDOWN)
+                       sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
         }
+       if (other)
+               sock_put(other);
+
         return 0;
  }
  
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c

index 74944a2..788a12c 100644 (file)
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -59,8 +59,6 @@
  #include <asm/uaccess.h>        /* copy_to/from_user */
  #include <linux/init.h>         /* __initfunc et al. */
  
-#define KMEM_SAFETYZONE 8
-
  #define DEV_TO_SLAVE(dev)      (*((struct net_device **)netdev_priv(dev)))
  
  /*
author	David S. Miller <davem@davemloft.net>
	Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
committer	David S. Miller <davem@davemloft.net>
	Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
Documentation/feature-removal-schedule.txt		patch \| blob \| history
drivers/net/atl1c/atl1c_hw.c		patch \| blob \| history
drivers/net/atl1c/atl1c_hw.h		patch \| blob \| history
drivers/net/atl1e/atl1e_ethtool.c		patch \| blob \| history
drivers/net/atl1e/atl1e_hw.c		patch \| blob \| history
drivers/net/atl1e/atl1e_hw.h		patch \| blob \| history
drivers/net/atl1e/atl1e_main.c		patch \| blob \| history
drivers/net/e1000e/e1000.h		patch \| blob \| history
drivers/net/e1000e/ethtool.c		patch \| blob \| history
drivers/net/e1000e/ich8lan.c		patch \| blob \| history
drivers/net/e1000e/lib.c		patch \| blob \| history
drivers/net/e1000e/netdev.c		patch \| blob \| history
drivers/net/e1000e/phy.c		patch \| blob \| history
drivers/net/enic/enic.h		patch \| blob \| history
drivers/net/enic/enic_main.c		patch \| blob \| history
drivers/net/igb/e1000_82575.c		patch \| blob \| history
drivers/net/igb/e1000_hw.h		patch \| blob \| history
drivers/net/igb/igb_main.c		patch \| blob \| history
drivers/net/ppp_generic.c		patch \| blob \| history
drivers/net/via-velocity.c		patch \| blob \| history
drivers/net/via-velocity.h		patch \| blob \| history
drivers/net/vxge/vxge-config.c		patch \| blob \| history
drivers/net/vxge/vxge-config.h		patch \| blob \| history
drivers/net/vxge/vxge-main.c		patch \| blob \| history
drivers/net/vxge/vxge-main.h		patch \| blob \| history
drivers/net/vxge/vxge-traffic.c		patch \| blob \| history
drivers/net/vxge/vxge-traffic.h		patch \| blob \| history
drivers/net/vxge/vxge-version.h		patch \| blob \| history
include/linux/audit.h		patch \| blob \| history
include/linux/dccp.h		patch \| blob \| history
include/linux/if_link.h		patch \| blob \| history
include/linux/ip_vs.h		patch \| blob \| history
include/linux/netdevice.h		patch \| blob \| history
include/linux/netfilter.h		patch \| blob \| history
include/linux/netfilter/Kbuild		patch \| blob \| history
include/linux/netfilter/nf_conntrack_snmp.h	[new file with mode: 0644]	patch \| blob
include/linux/netfilter/nfnetlink_conntrack.h		patch \| blob \| history
include/linux/netfilter/x_tables.h		patch \| blob \| history
include/linux/netfilter/xt_AUDIT.h	[new file with mode: 0644]	patch \| blob
include/linux/netfilter/xt_CT.h		patch \| blob \| history
include/linux/netfilter/xt_NFQUEUE.h		patch \| blob \| history
include/linux/netfilter/xt_TCPOPTSTRIP.h		patch \| blob \| history
include/linux/netfilter/xt_TPROXY.h		patch \| blob \| history
include/linux/netfilter/xt_cluster.h		patch \| blob \| history
include/linux/netfilter/xt_comment.h		patch \| blob \| history
include/linux/netfilter/xt_connlimit.h		patch \| blob \| history
include/linux/netfilter/xt_conntrack.h		patch \| blob \| history
include/linux/netfilter/xt_quota.h		patch \| blob \| history
include/linux/netfilter/xt_socket.h		patch \| blob \| history
include/linux/netfilter/xt_time.h		patch \| blob \| history
include/linux/netfilter/xt_u32.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_802_3.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_among.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_arp.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_ip.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_ip6.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_limit.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_log.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_mark_m.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_nflog.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_pkttype.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_stp.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_ulog.h		patch \| blob \| history
include/linux/netfilter_bridge/ebt_vlan.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_CLUSTERIP.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_ECN.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_SAME.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_TTL.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_addrtype.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_ah.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_ecn.h		patch \| blob \| history
include/linux/netfilter_ipv4/ipt_ttl.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_HL.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_REJECT.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_ah.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_frag.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_hl.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_ipv6header.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_mh.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_opts.h		patch \| blob \| history
include/linux/netfilter_ipv6/ip6t_rt.h		patch \| blob \| history
include/linux/pkt_sched.h		patch \| blob \| history
include/linux/skbuff.h		patch \| blob \| history
include/net/dst.h		patch \| blob \| history
include/net/ip_fib.h		patch \| blob \| history
include/net/ip_vs.h		patch \| blob \| history
include/net/net_namespace.h		patch \| blob \| history
include/net/netfilter/nf_conntrack.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_ecache.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_extend.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_helper.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_l3proto.h		patch \| blob \| history
include/net/netfilter/nf_conntrack_timestamp.h	[new file with mode: 0644]	patch \| blob
include/net/netfilter/nf_nat.h		patch \| blob \| history
include/net/netfilter/nf_nat_core.h		patch \| blob \| history
include/net/netns/conntrack.h		patch \| blob \| history
include/net/netns/ip_vs.h	[new file with mode: 0644]	patch \| blob
include/net/netns/ipv4.h		patch \| blob \| history
include/net/sch_generic.h		patch \| blob \| history
include/net/sock.h		patch \| blob \| history
kernel/audit.c		patch \| blob \| history
net/9p/trans_rdma.c		patch \| blob \| history
net/bridge/netfilter/ebt_ip6.c		patch \| blob \| history
net/bridge/netfilter/ebtables.c		patch \| blob \| history
net/caif/cfcnfg.c		patch \| blob \| history
net/caif/cfdgml.c		patch \| blob \| history
net/caif/cfserl.c		patch \| blob \| history
net/caif/cfutill.c		patch \| blob \| history
net/caif/cfveil.c		patch \| blob \| history
net/core/dev.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/core/neighbour.c		patch \| blob \| history
net/core/rtnetlink.c		patch \| blob \| history
net/decnet/dn_table.c		patch \| blob \| history
net/ipv4/Kconfig		patch \| blob \| history
net/ipv4/fib_rules.c		patch \| blob \| history
net/ipv4/fib_semantics.c		patch \| blob \| history
net/ipv4/ip_input.c		patch \| blob \| history
net/ipv4/netfilter/Kconfig		patch \| blob \| history
net/ipv4/netfilter/arp_tables.c		patch \| blob \| history
net/ipv4/netfilter/ip_tables.c		patch \| blob \| history
net/ipv4/netfilter/ipt_CLUSTERIP.c		patch \| blob \| history
net/ipv4/netfilter/ipt_LOG.c		patch \| blob \| history
net/ipv4/netfilter/iptable_mangle.c		patch \| blob \| history
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c		patch \| blob \| history
net/ipv4/netfilter/nf_nat_amanda.c		patch \| blob \| history
net/ipv4/netfilter/nf_nat_core.c		patch \| blob \| history
net/ipv4/netfilter/nf_nat_snmp_basic.c		patch \| blob \| history
net/ipv4/route.c		patch \| blob \| history
net/ipv6/netfilter/ip6_tables.c		patch \| blob \| history
net/ipv6/netfilter/ip6t_LOG.c		patch \| blob \| history
net/ipv6/netfilter/nf_conntrack_reasm.c		patch \| blob \| history
net/ipv6/raw.c		patch \| blob \| history
net/ipv6/sit.c		patch \| blob \| history
net/netfilter/Kconfig		patch \| blob \| history
net/netfilter/Makefile		patch \| blob \| history
net/netfilter/core.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_app.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_conn.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_core.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_ctl.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_est.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_ftp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_lblc.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_lblcr.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_nfct.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_pe.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_pe_sip.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto_ah_esp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto_sctp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto_tcp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_proto_udp.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_sync.c		patch \| blob \| history
net/netfilter/ipvs/ip_vs_xmit.c		patch \| blob \| history
net/netfilter/nf_conntrack_broadcast.c	[new file with mode: 0644]	patch \| blob
net/netfilter/nf_conntrack_core.c		patch \| blob \| history
net/netfilter/nf_conntrack_expect.c		patch \| blob \| history
net/netfilter/nf_conntrack_extend.c		patch \| blob \| history
net/netfilter/nf_conntrack_helper.c		patch \| blob \| history
net/netfilter/nf_conntrack_netbios_ns.c		patch \| blob \| history
net/netfilter/nf_conntrack_netlink.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_dccp.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_sctp.c		patch \| blob \| history
net/netfilter/nf_conntrack_proto_tcp.c		patch \| blob \| history
net/netfilter/nf_conntrack_snmp.c	[new file with mode: 0644]	patch \| blob
net/netfilter/nf_conntrack_standalone.c		patch \| blob \| history
net/netfilter/nf_conntrack_timestamp.c	[new file with mode: 0644]	patch \| blob
net/netfilter/nf_log.c		patch \| blob \| history
net/netfilter/nf_queue.c		patch \| blob \| history
net/netfilter/nfnetlink_log.c		patch \| blob \| history
net/netfilter/nfnetlink_queue.c		patch \| blob \| history
net/netfilter/x_tables.c		patch \| blob \| history
net/netfilter/xt_AUDIT.c	[new file with mode: 0644]	patch \| blob
net/netfilter/xt_CLASSIFY.c		patch \| blob \| history
net/netfilter/xt_IDLETIMER.c		patch \| blob \| history
net/netfilter/xt_LED.c		patch \| blob \| history
net/netfilter/xt_NFQUEUE.c		patch \| blob \| history
net/netfilter/xt_connlimit.c		patch \| blob \| history
net/netfilter/xt_conntrack.c		patch \| blob \| history
net/netfilter/xt_cpu.c		patch \| blob \| history
net/netfilter/xt_ipvs.c		patch \| blob \| history
net/packet/af_packet.c		patch \| blob \| history
net/rds/rds.h		patch \| blob \| history
net/sched/Kconfig		patch \| blob \| history
net/sched/Makefile		patch \| blob \| history
net/sched/act_api.c		patch \| blob \| history
net/sched/act_csum.c		patch \| blob \| history
net/sched/act_gact.c		patch \| blob \| history
net/sched/act_ipt.c		patch \| blob \| history
net/sched/act_mirred.c		patch \| blob \| history
net/sched/act_nat.c		patch \| blob \| history
net/sched/act_pedit.c		patch \| blob \| history
net/sched/act_police.c		patch \| blob \| history
net/sched/act_simple.c		patch \| blob \| history
net/sched/act_skbedit.c		patch \| blob \| history
net/sched/cls_api.c		patch \| blob \| history
net/sched/cls_basic.c		patch \| blob \| history
net/sched/cls_cgroup.c		patch \| blob \| history
net/sched/cls_flow.c		patch \| blob \| history
net/sched/cls_fw.c		patch \| blob \| history
net/sched/cls_route.c		patch \| blob \| history
net/sched/cls_rsvp.h		patch \| blob \| history
net/sched/cls_tcindex.c		patch \| blob \| history
net/sched/cls_u32.c		patch \| blob \| history
net/sched/em_cmp.c		patch \| blob \| history
net/sched/em_meta.c		patch \| blob \| history
net/sched/em_nbyte.c		patch \| blob \| history
net/sched/em_text.c		patch \| blob \| history
net/sched/em_u32.c		patch \| blob \| history
net/sched/ematch.c		patch \| blob \| history
net/sched/sch_api.c		patch \| blob \| history
net/sched/sch_atm.c		patch \| blob \| history
net/sched/sch_cbq.c		patch \| blob \| history
net/sched/sch_dsmark.c		patch \| blob \| history
net/sched/sch_fifo.c		patch \| blob \| history
net/sched/sch_generic.c		patch \| blob \| history
net/sched/sch_gred.c		patch \| blob \| history
net/sched/sch_hfsc.c		patch \| blob \| history
net/sched/sch_htb.c		patch \| blob \| history
net/sched/sch_mq.c		patch \| blob \| history
net/sched/sch_mqprio.c	[new file with mode: 0644]	patch \| blob
net/sched/sch_multiq.c		patch \| blob \| history
net/sched/sch_netem.c		patch \| blob \| history
net/sched/sch_prio.c		patch \| blob \| history
net/sched/sch_red.c		patch \| blob \| history
net/sched/sch_sfq.c		patch \| blob \| history
net/sched/sch_tbf.c		patch \| blob \| history
net/sched/sch_teql.c		patch \| blob \| history
net/unix/af_unix.c		patch \| blob \| history
net/wanrouter/wanmain.c		patch \| blob \| history