Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
authorDavid S. Miller <davem@davemloft.net>
Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
committerDavid S. Miller <davem@davemloft.net>
Mon, 24 Jan 2011 22:09:35 +0000 (14:09 -0800)
Conflicts:
net/sched/sch_hfsc.c
net/sched/sch_htb.c
net/sched/sch_tbf.c

232 files changed:
Documentation/feature-removal-schedule.txt
drivers/net/atl1c/atl1c_hw.c
drivers/net/atl1c/atl1c_hw.h
drivers/net/atl1e/atl1e_ethtool.c
drivers/net/atl1e/atl1e_hw.c
drivers/net/atl1e/atl1e_hw.h
drivers/net/atl1e/atl1e_main.c
drivers/net/e1000e/e1000.h
drivers/net/e1000e/ethtool.c
drivers/net/e1000e/ich8lan.c
drivers/net/e1000e/lib.c
drivers/net/e1000e/netdev.c
drivers/net/e1000e/phy.c
drivers/net/enic/enic.h
drivers/net/enic/enic_main.c
drivers/net/igb/e1000_82575.c
drivers/net/igb/e1000_hw.h
drivers/net/igb/igb_main.c
drivers/net/ppp_generic.c
drivers/net/via-velocity.c
drivers/net/via-velocity.h
drivers/net/vxge/vxge-config.c
drivers/net/vxge/vxge-config.h
drivers/net/vxge/vxge-main.c
drivers/net/vxge/vxge-main.h
drivers/net/vxge/vxge-traffic.c
drivers/net/vxge/vxge-traffic.h
drivers/net/vxge/vxge-version.h
include/linux/audit.h
include/linux/dccp.h
include/linux/if_link.h
include/linux/ip_vs.h
include/linux/netdevice.h
include/linux/netfilter.h
include/linux/netfilter/Kbuild
include/linux/netfilter/nf_conntrack_snmp.h [new file with mode: 0644]
include/linux/netfilter/nfnetlink_conntrack.h
include/linux/netfilter/x_tables.h
include/linux/netfilter/xt_AUDIT.h [new file with mode: 0644]
include/linux/netfilter/xt_CT.h
include/linux/netfilter/xt_NFQUEUE.h
include/linux/netfilter/xt_TCPOPTSTRIP.h
include/linux/netfilter/xt_TPROXY.h
include/linux/netfilter/xt_cluster.h
include/linux/netfilter/xt_comment.h
include/linux/netfilter/xt_connlimit.h
include/linux/netfilter/xt_conntrack.h
include/linux/netfilter/xt_quota.h
include/linux/netfilter/xt_socket.h
include/linux/netfilter/xt_time.h
include/linux/netfilter/xt_u32.h
include/linux/netfilter_bridge/ebt_802_3.h
include/linux/netfilter_bridge/ebt_among.h
include/linux/netfilter_bridge/ebt_arp.h
include/linux/netfilter_bridge/ebt_ip.h
include/linux/netfilter_bridge/ebt_ip6.h
include/linux/netfilter_bridge/ebt_limit.h
include/linux/netfilter_bridge/ebt_log.h
include/linux/netfilter_bridge/ebt_mark_m.h
include/linux/netfilter_bridge/ebt_nflog.h
include/linux/netfilter_bridge/ebt_pkttype.h
include/linux/netfilter_bridge/ebt_stp.h
include/linux/netfilter_bridge/ebt_ulog.h
include/linux/netfilter_bridge/ebt_vlan.h
include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
include/linux/netfilter_ipv4/ipt_ECN.h
include/linux/netfilter_ipv4/ipt_SAME.h
include/linux/netfilter_ipv4/ipt_TTL.h
include/linux/netfilter_ipv4/ipt_addrtype.h
include/linux/netfilter_ipv4/ipt_ah.h
include/linux/netfilter_ipv4/ipt_ecn.h
include/linux/netfilter_ipv4/ipt_ttl.h
include/linux/netfilter_ipv6/ip6t_HL.h
include/linux/netfilter_ipv6/ip6t_REJECT.h
include/linux/netfilter_ipv6/ip6t_ah.h
include/linux/netfilter_ipv6/ip6t_frag.h
include/linux/netfilter_ipv6/ip6t_hl.h
include/linux/netfilter_ipv6/ip6t_ipv6header.h
include/linux/netfilter_ipv6/ip6t_mh.h
include/linux/netfilter_ipv6/ip6t_opts.h
include/linux/netfilter_ipv6/ip6t_rt.h
include/linux/pkt_sched.h
include/linux/skbuff.h
include/net/dst.h
include/net/ip_fib.h
include/net/ip_vs.h
include/net/net_namespace.h
include/net/netfilter/nf_conntrack.h
include/net/netfilter/nf_conntrack_ecache.h
include/net/netfilter/nf_conntrack_extend.h
include/net/netfilter/nf_conntrack_helper.h
include/net/netfilter/nf_conntrack_l3proto.h
include/net/netfilter/nf_conntrack_timestamp.h [new file with mode: 0644]
include/net/netfilter/nf_nat.h
include/net/netfilter/nf_nat_core.h
include/net/netns/conntrack.h
include/net/netns/ip_vs.h [new file with mode: 0644]
include/net/netns/ipv4.h
include/net/sch_generic.h
include/net/sock.h
kernel/audit.c
net/9p/trans_rdma.c
net/bridge/netfilter/ebt_ip6.c
net/bridge/netfilter/ebtables.c
net/caif/cfcnfg.c
net/caif/cfdgml.c
net/caif/cfserl.c
net/caif/cfutill.c
net/caif/cfveil.c
net/core/dev.c
net/core/filter.c
net/core/neighbour.c
net/core/rtnetlink.c
net/decnet/dn_table.c
net/ipv4/Kconfig
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/ip_input.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_LOG.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
net/ipv4/netfilter/nf_nat_amanda.c
net/ipv4/netfilter/nf_nat_core.c
net/ipv4/netfilter/nf_nat_snmp_basic.c
net/ipv4/route.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_LOG.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/raw.c
net/ipv6/sit.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/core.c
net/netfilter/ipvs/ip_vs_app.c
net/netfilter/ipvs/ip_vs_conn.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_est.c
net/netfilter/ipvs/ip_vs_ftp.c
net/netfilter/ipvs/ip_vs_lblc.c
net/netfilter/ipvs/ip_vs_lblcr.c
net/netfilter/ipvs/ip_vs_nfct.c
net/netfilter/ipvs/ip_vs_pe.c
net/netfilter/ipvs/ip_vs_pe_sip.c
net/netfilter/ipvs/ip_vs_proto.c
net/netfilter/ipvs/ip_vs_proto_ah_esp.c
net/netfilter/ipvs/ip_vs_proto_sctp.c
net/netfilter/ipvs/ip_vs_proto_tcp.c
net/netfilter/ipvs/ip_vs_proto_udp.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/ipvs/ip_vs_xmit.c
net/netfilter/nf_conntrack_broadcast.c [new file with mode: 0644]
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_expect.c
net/netfilter/nf_conntrack_extend.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netbios_ns.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_conntrack_snmp.c [new file with mode: 0644]
net/netfilter/nf_conntrack_standalone.c
net/netfilter/nf_conntrack_timestamp.c [new file with mode: 0644]
net/netfilter/nf_log.c
net/netfilter/nf_queue.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/x_tables.c
net/netfilter/xt_AUDIT.c [new file with mode: 0644]
net/netfilter/xt_CLASSIFY.c
net/netfilter/xt_IDLETIMER.c
net/netfilter/xt_LED.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_connlimit.c
net/netfilter/xt_conntrack.c
net/netfilter/xt_cpu.c
net/netfilter/xt_ipvs.c
net/packet/af_packet.c
net/rds/rds.h
net/sched/Kconfig
net/sched/Makefile
net/sched/act_api.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/cls_api.c
net/sched/cls_basic.c
net/sched/cls_cgroup.c
net/sched/cls_flow.c
net/sched/cls_fw.c
net/sched/cls_route.c
net/sched/cls_rsvp.h
net/sched/cls_tcindex.c
net/sched/cls_u32.c
net/sched/em_cmp.c
net/sched/em_meta.c
net/sched/em_nbyte.c
net/sched/em_text.c
net/sched/em_u32.c
net/sched/ematch.c
net/sched/sch_api.c
net/sched/sch_atm.c
net/sched/sch_cbq.c
net/sched/sch_dsmark.c
net/sched/sch_fifo.c
net/sched/sch_generic.c
net/sched/sch_gred.c
net/sched/sch_hfsc.c
net/sched/sch_htb.c
net/sched/sch_mq.c
net/sched/sch_mqprio.c [new file with mode: 0644]
net/sched/sch_multiq.c
net/sched/sch_netem.c
net/sched/sch_prio.c
net/sched/sch_red.c
net/sched/sch_sfq.c
net/sched/sch_tbf.c
net/sched/sch_teql.c
net/unix/af_unix.c
net/wanrouter/wanmain.c

index b959659..ccb6048 100644 (file)
@@ -603,3 +603,10 @@ Why:       The adm9240, w83792d and w83793 hardware monitoring drivers have
 Who:   Jean Delvare <khali@linux-fr.org>
 
 ----------------------------
+
+What:  xt_connlimit rev 0
+When:  2012
+Who:   Jan Engelhardt <jengelh@medozas.de>
+Files: net/netfilter/xt_connlimit.c
+
+----------------------------
index 1bf6720..23f2ab0 100644 (file)
@@ -345,7 +345,7 @@ int atl1c_write_phy_reg(struct atl1c_hw *hw, u32 reg_addr, u16 phy_data)
  */
 static int atl1c_phy_setup_adv(struct atl1c_hw *hw)
 {
-       u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_SPEED_MASK;
+       u16 mii_adv_data = ADVERTISE_DEFAULT_CAP & ~ADVERTISE_ALL;
        u16 mii_giga_ctrl_data = GIGA_CR_1000T_DEFAULT_CAP &
                                ~GIGA_CR_1000T_SPEED_MASK;
 
@@ -373,7 +373,7 @@ static int atl1c_phy_setup_adv(struct atl1c_hw *hw)
        }
 
        if (atl1c_write_phy_reg(hw, MII_ADVERTISE, mii_adv_data) != 0 ||
-           atl1c_write_phy_reg(hw, MII_GIGA_CR, mii_giga_ctrl_data) != 0)
+           atl1c_write_phy_reg(hw, MII_CTRL1000, mii_giga_ctrl_data) != 0)
                return -1;
        return 0;
 }
@@ -517,19 +517,18 @@ int atl1c_phy_init(struct atl1c_hw *hw)
                                        "Error Setting up Auto-Negotiation\n");
                        return ret_val;
                }
-               mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG;
+               mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART;
                break;
        case MEDIA_TYPE_100M_FULL:
-               mii_bmcr_data |= BMCR_SPEED_100 | BMCR_FULL_DUPLEX;
+               mii_bmcr_data |= BMCR_SPEED100 | BMCR_FULLDPLX;
                break;
        case MEDIA_TYPE_100M_HALF:
-               mii_bmcr_data |= BMCR_SPEED_100;
+               mii_bmcr_data |= BMCR_SPEED100;
                break;
        case MEDIA_TYPE_10M_FULL:
-               mii_bmcr_data |= BMCR_SPEED_10 | BMCR_FULL_DUPLEX;
+               mii_bmcr_data |= BMCR_FULLDPLX;
                break;
        case MEDIA_TYPE_10M_HALF:
-               mii_bmcr_data |= BMCR_SPEED_10;
                break;
        default:
                if (netif_msg_link(adapter))
@@ -657,7 +656,7 @@ int atl1c_restart_autoneg(struct atl1c_hw *hw)
        err = atl1c_phy_setup_adv(hw);
        if (err)
                return err;
-       mii_bmcr_data |= BMCR_AUTO_NEG_EN | BMCR_RESTART_AUTO_NEG;
+       mii_bmcr_data |= BMCR_ANENABLE | BMCR_ANRESTART;
 
        return atl1c_write_phy_reg(hw, MII_BMCR, mii_bmcr_data);
 }
index 3dd6759..655fc6c 100644 (file)
@@ -736,55 +736,16 @@ int atl1c_phy_power_saving(struct atl1c_hw *hw);
 #define REG_DEBUG_DATA0                0x1900
 #define REG_DEBUG_DATA1                0x1904
 
-/* PHY Control Register */
-#define MII_BMCR                       0x00
-#define BMCR_SPEED_SELECT_MSB          0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define BMCR_COLL_TEST_ENABLE          0x0080  /* Collision test enable */
-#define BMCR_FULL_DUPLEX               0x0100  /* FDX =1, half duplex =0 */
-#define BMCR_RESTART_AUTO_NEG          0x0200  /* Restart auto negotiation */
-#define BMCR_ISOLATE                   0x0400  /* Isolate PHY from MII */
-#define BMCR_POWER_DOWN                        0x0800  /* Power down */
-#define BMCR_AUTO_NEG_EN               0x1000  /* Auto Neg Enable */
-#define BMCR_SPEED_SELECT_LSB          0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define BMCR_LOOPBACK                  0x4000  /* 0 = normal, 1 = loopback */
-#define BMCR_RESET                     0x8000  /* 0 = normal, 1 = PHY reset */
-#define BMCR_SPEED_MASK                        0x2040
-#define BMCR_SPEED_1000                        0x0040
-#define BMCR_SPEED_100                 0x2000
-#define BMCR_SPEED_10                  0x0000
-
-/* PHY Status Register */
-#define MII_BMSR                       0x01
-#define BMMSR_EXTENDED_CAPS            0x0001  /* Extended register capabilities */
-#define BMSR_JABBER_DETECT             0x0002  /* Jabber Detected */
-#define BMSR_LINK_STATUS               0x0004  /* Link Status 1 = link */
-#define BMSR_AUTONEG_CAPS              0x0008  /* Auto Neg Capable */
-#define BMSR_REMOTE_FAULT              0x0010  /* Remote Fault Detect */
-#define BMSR_AUTONEG_COMPLETE          0x0020  /* Auto Neg Complete */
-#define BMSR_PREAMBLE_SUPPRESS         0x0040  /* Preamble may be suppressed */
-#define BMSR_EXTENDED_STATUS           0x0100  /* Ext. status info in Reg 0x0F */
-#define BMSR_100T2_HD_CAPS             0x0200  /* 100T2 Half Duplex Capable */
-#define BMSR_100T2_FD_CAPS             0x0400  /* 100T2 Full Duplex Capable */
-#define BMSR_10T_HD_CAPS               0x0800  /* 10T   Half Duplex Capable */
-#define BMSR_10T_FD_CAPS               0x1000  /* 10T   Full Duplex Capable */
-#define BMSR_100X_HD_CAPS              0x2000  /* 100X  Half Duplex Capable */
-#define BMMII_SR_100X_FD_CAPS          0x4000  /* 100X  Full Duplex Capable */
-#define BMMII_SR_100T4_CAPS            0x8000  /* 100T4 Capable */
-
-#define MII_PHYSID1                    0x02
-#define MII_PHYSID2                    0x03
 #define L1D_MPW_PHYID1                 0xD01C  /* V7 */
 #define L1D_MPW_PHYID2                 0xD01D  /* V1-V6 */
 #define L1D_MPW_PHYID3                 0xD01E  /* V8 */
 
 
 /* Autoneg Advertisement Register */
-#define MII_ADVERTISE                  0x04
-#define ADVERTISE_SPEED_MASK           0x01E0
-#define ADVERTISE_DEFAULT_CAP          0x0DE0
+#define ADVERTISE_DEFAULT_CAP \
+       (ADVERTISE_ALL | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)
 
 /* 1000BASE-T Control Register */
-#define MII_GIGA_CR                    0x09
 #define GIGA_CR_1000T_REPEATER_DTE     0x0400  /* 1=Repeater/switch device port 0=DTE device */
 
 #define GIGA_CR_1000T_MS_VALUE         0x0800  /* 1=Configure PHY as Master 0=Configure PHY as Slave */
index 6943a6c..1209297 100644 (file)
@@ -95,18 +95,18 @@ static int atl1e_set_settings(struct net_device *netdev,
                ecmd->advertising = hw->autoneg_advertised |
                                    ADVERTISED_TP | ADVERTISED_Autoneg;
 
-               adv4 = hw->mii_autoneg_adv_reg & ~MII_AR_SPEED_MASK;
+               adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL;
                adv9 = hw->mii_1000t_ctrl_reg & ~MII_AT001_CR_1000T_SPEED_MASK;
                if (hw->autoneg_advertised & ADVERTISE_10_HALF)
-                       adv4 |= MII_AR_10T_HD_CAPS;
+                       adv4 |= ADVERTISE_10HALF;
                if (hw->autoneg_advertised & ADVERTISE_10_FULL)
-                       adv4 |= MII_AR_10T_FD_CAPS;
+                       adv4 |= ADVERTISE_10FULL;
                if (hw->autoneg_advertised & ADVERTISE_100_HALF)
-                       adv4 |= MII_AR_100TX_HD_CAPS;
+                       adv4 |= ADVERTISE_100HALF;
                if (hw->autoneg_advertised & ADVERTISE_100_FULL)
-                       adv4 |= MII_AR_100TX_FD_CAPS;
+                       adv4 |= ADVERTISE_100FULL;
                if (hw->autoneg_advertised & ADVERTISE_1000_FULL)
-                       adv9 |= MII_AT001_CR_1000T_FD_CAPS;
+                       adv9 |= ADVERTISE_1000FULL;
 
                if (adv4 != hw->mii_autoneg_adv_reg ||
                                adv9 != hw->mii_1000t_ctrl_reg) {
index 76cc043..923063d 100644 (file)
@@ -318,7 +318,7 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw)
         * Advertisement Register (Address 4) and the 1000 mb speed bits in
         * the  1000Base-T control Register (Address 9).
         */
-       mii_autoneg_adv_reg &= ~MII_AR_SPEED_MASK;
+       mii_autoneg_adv_reg &= ~ADVERTISE_ALL;
        mii_1000t_ctrl_reg  &= ~MII_AT001_CR_1000T_SPEED_MASK;
 
        /*
@@ -327,44 +327,37 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw)
         */
        switch (hw->media_type) {
        case MEDIA_TYPE_AUTO_SENSOR:
-               mii_autoneg_adv_reg |= (MII_AR_10T_HD_CAPS   |
-                                       MII_AR_10T_FD_CAPS   |
-                                       MII_AR_100TX_HD_CAPS |
-                                       MII_AR_100TX_FD_CAPS);
-               hw->autoneg_advertised = ADVERTISE_10_HALF  |
-                                        ADVERTISE_10_FULL  |
-                                        ADVERTISE_100_HALF |
-                                        ADVERTISE_100_FULL;
+               mii_autoneg_adv_reg |= ADVERTISE_ALL;
+               hw->autoneg_advertised = ADVERTISE_ALL;
                if (hw->nic_type == athr_l1e) {
-                       mii_1000t_ctrl_reg |=
-                               MII_AT001_CR_1000T_FD_CAPS;
+                       mii_1000t_ctrl_reg |= ADVERTISE_1000FULL;
                        hw->autoneg_advertised |= ADVERTISE_1000_FULL;
                }
                break;
 
        case MEDIA_TYPE_100M_FULL:
-               mii_autoneg_adv_reg   |= MII_AR_100TX_FD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_100FULL;
                hw->autoneg_advertised = ADVERTISE_100_FULL;
                break;
 
        case MEDIA_TYPE_100M_HALF:
-               mii_autoneg_adv_reg   |= MII_AR_100TX_HD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_100_HALF;
                hw->autoneg_advertised = ADVERTISE_100_HALF;
                break;
 
        case MEDIA_TYPE_10M_FULL:
-               mii_autoneg_adv_reg   |= MII_AR_10T_FD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_10_FULL;
                hw->autoneg_advertised = ADVERTISE_10_FULL;
                break;
 
        default:
-               mii_autoneg_adv_reg   |= MII_AR_10T_HD_CAPS;
+               mii_autoneg_adv_reg   |= ADVERTISE_10_HALF;
                hw->autoneg_advertised = ADVERTISE_10_HALF;
                break;
        }
 
        /* flow control fixed to enable all */
-       mii_autoneg_adv_reg |= (MII_AR_ASM_DIR | MII_AR_PAUSE);
+       mii_autoneg_adv_reg |= (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP);
 
        hw->mii_autoneg_adv_reg = mii_autoneg_adv_reg;
        hw->mii_1000t_ctrl_reg  = mii_1000t_ctrl_reg;
@@ -374,7 +367,7 @@ static int atl1e_phy_setup_autoneg_adv(struct atl1e_hw *hw)
                return ret_val;
 
        if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) {
-               ret_val = atl1e_write_phy_reg(hw, MII_AT001_CR,
+               ret_val = atl1e_write_phy_reg(hw, MII_CTRL1000,
                                           mii_1000t_ctrl_reg);
                if (ret_val)
                        return ret_val;
@@ -397,7 +390,7 @@ int atl1e_phy_commit(struct atl1e_hw *hw)
        int ret_val;
        u16 phy_data;
 
-       phy_data = MII_CR_RESET | MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG;
+       phy_data = BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART;
 
        ret_val = atl1e_write_phy_reg(hw, MII_BMCR, phy_data);
        if (ret_val) {
@@ -645,15 +638,14 @@ int atl1e_restart_autoneg(struct atl1e_hw *hw)
                return err;
 
        if (hw->nic_type == athr_l1e || hw->nic_type == athr_l2e_revA) {
-               err = atl1e_write_phy_reg(hw, MII_AT001_CR,
+               err = atl1e_write_phy_reg(hw, MII_CTRL1000,
                                       hw->mii_1000t_ctrl_reg);
                if (err)
                        return err;
        }
 
        err = atl1e_write_phy_reg(hw, MII_BMCR,
-                       MII_CR_RESET | MII_CR_AUTO_NEG_EN |
-                       MII_CR_RESTART_AUTO_NEG);
+                       BMCR_RESET | BMCR_ANENABLE | BMCR_ANRESTART);
        return err;
 }
 
index 5ea2f4d..74df16a 100644 (file)
@@ -629,127 +629,24 @@ s32 atl1e_restart_autoneg(struct atl1e_hw *hw);
 
 /***************************** MII definition ***************************************/
 /* PHY Common Register */
-#define MII_BMCR                        0x00
-#define MII_BMSR                        0x01
-#define MII_PHYSID1                     0x02
-#define MII_PHYSID2                     0x03
-#define MII_ADVERTISE                   0x04
-#define MII_LPA                         0x05
-#define MII_EXPANSION                   0x06
-#define MII_AT001_CR                    0x09
-#define MII_AT001_SR                    0x0A
-#define MII_AT001_ESR                   0x0F
 #define MII_AT001_PSCR                  0x10
 #define MII_AT001_PSSR                  0x11
 #define MII_INT_CTRL                    0x12
 #define MII_INT_STATUS                  0x13
 #define MII_SMARTSPEED                  0x14
-#define MII_RERRCOUNTER                 0x15
-#define MII_SREVISION                   0x16
-#define MII_RESV1                       0x17
 #define MII_LBRERROR                    0x18
-#define MII_PHYADDR                     0x19
 #define MII_RESV2                       0x1a
-#define MII_TPISTATUS                   0x1b
-#define MII_NCONFIG                     0x1c
 
 #define MII_DBG_ADDR                   0x1D
 #define MII_DBG_DATA                   0x1E
 
-
-/* PHY Control Register */
-#define MII_CR_SPEED_SELECT_MSB                  0x0040  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_COLL_TEST_ENABLE                  0x0080  /* Collision test enable */
-#define MII_CR_FULL_DUPLEX                       0x0100  /* FDX =1, half duplex =0 */
-#define MII_CR_RESTART_AUTO_NEG                  0x0200  /* Restart auto negotiation */
-#define MII_CR_ISOLATE                           0x0400  /* Isolate PHY from MII */
-#define MII_CR_POWER_DOWN                        0x0800  /* Power down */
-#define MII_CR_AUTO_NEG_EN                       0x1000  /* Auto Neg Enable */
-#define MII_CR_SPEED_SELECT_LSB                  0x2000  /* bits 6,13: 10=1000, 01=100, 00=10 */
-#define MII_CR_LOOPBACK                          0x4000  /* 0 = normal, 1 = loopback */
-#define MII_CR_RESET                             0x8000  /* 0 = normal, 1 = PHY reset */
-#define MII_CR_SPEED_MASK                        0x2040
-#define MII_CR_SPEED_1000                        0x0040
-#define MII_CR_SPEED_100                         0x2000
-#define MII_CR_SPEED_10                          0x0000
-
-
-/* PHY Status Register */
-#define MII_SR_EXTENDED_CAPS                     0x0001  /* Extended register capabilities */
-#define MII_SR_JABBER_DETECT                     0x0002  /* Jabber Detected */
-#define MII_SR_LINK_STATUS                       0x0004  /* Link Status 1 = link */
-#define MII_SR_AUTONEG_CAPS                      0x0008  /* Auto Neg Capable */
-#define MII_SR_REMOTE_FAULT                      0x0010  /* Remote Fault Detect */
-#define MII_SR_AUTONEG_COMPLETE                  0x0020  /* Auto Neg Complete */
-#define MII_SR_PREAMBLE_SUPPRESS                 0x0040  /* Preamble may be suppressed */
-#define MII_SR_EXTENDED_STATUS                   0x0100  /* Ext. status info in Reg 0x0F */
-#define MII_SR_100T2_HD_CAPS                     0x0200  /* 100T2 Half Duplex Capable */
-#define MII_SR_100T2_FD_CAPS                     0x0400  /* 100T2 Full Duplex Capable */
-#define MII_SR_10T_HD_CAPS                       0x0800  /* 10T   Half Duplex Capable */
-#define MII_SR_10T_FD_CAPS                       0x1000  /* 10T   Full Duplex Capable */
-#define MII_SR_100X_HD_CAPS                      0x2000  /* 100X  Half Duplex Capable */
-#define MII_SR_100X_FD_CAPS                      0x4000  /* 100X  Full Duplex Capable */
-#define MII_SR_100T4_CAPS                        0x8000  /* 100T4 Capable */
-
-/* Link partner ability register. */
-#define MII_LPA_SLCT                             0x001f  /* Same as advertise selector  */
-#define MII_LPA_10HALF                           0x0020  /* Can do 10mbps half-duplex   */
-#define MII_LPA_10FULL                           0x0040  /* Can do 10mbps full-duplex   */
-#define MII_LPA_100HALF                          0x0080  /* Can do 100mbps half-duplex  */
-#define MII_LPA_100FULL                          0x0100  /* Can do 100mbps full-duplex  */
-#define MII_LPA_100BASE4                         0x0200  /* 100BASE-T4  */
-#define MII_LPA_PAUSE                            0x0400  /* PAUSE */
-#define MII_LPA_ASYPAUSE                         0x0800  /* Asymmetrical PAUSE */
-#define MII_LPA_RFAULT                           0x2000  /* Link partner faulted        */
-#define MII_LPA_LPACK                            0x4000  /* Link partner acked us       */
-#define MII_LPA_NPAGE                            0x8000  /* Next page bit               */
-
 /* Autoneg Advertisement Register */
-#define MII_AR_SELECTOR_FIELD                   0x0001  /* indicates IEEE 802.3 CSMA/CD */
-#define MII_AR_10T_HD_CAPS                      0x0020  /* 10T   Half Duplex Capable */
-#define MII_AR_10T_FD_CAPS                      0x0040  /* 10T   Full Duplex Capable */
-#define MII_AR_100TX_HD_CAPS                    0x0080  /* 100TX Half Duplex Capable */
-#define MII_AR_100TX_FD_CAPS                    0x0100  /* 100TX Full Duplex Capable */
-#define MII_AR_100T4_CAPS                       0x0200  /* 100T4 Capable */
-#define MII_AR_PAUSE                            0x0400  /* Pause operation desired */
-#define MII_AR_ASM_DIR                          0x0800  /* Asymmetric Pause Direction bit */
-#define MII_AR_REMOTE_FAULT                     0x2000  /* Remote Fault detected */
-#define MII_AR_NEXT_PAGE                        0x8000  /* Next Page ability supported */
-#define MII_AR_SPEED_MASK                       0x01E0
-#define MII_AR_DEFAULT_CAP_MASK                 0x0DE0
+#define MII_AR_DEFAULT_CAP_MASK                 0
 
 /* 1000BASE-T Control Register */
-#define MII_AT001_CR_1000T_HD_CAPS              0x0100  /* Advertise 1000T HD capability */
-#define MII_AT001_CR_1000T_FD_CAPS              0x0200  /* Advertise 1000T FD capability  */
-#define MII_AT001_CR_1000T_REPEATER_DTE         0x0400  /* 1=Repeater/switch device port */
-/* 0=DTE device */
-#define MII_AT001_CR_1000T_MS_VALUE             0x0800  /* 1=Configure PHY as Master */
-/* 0=Configure PHY as Slave */
-#define MII_AT001_CR_1000T_MS_ENABLE            0x1000  /* 1=Master/Slave manual config value */
-/* 0=Automatic Master/Slave config */
-#define MII_AT001_CR_1000T_TEST_MODE_NORMAL     0x0000  /* Normal Operation */
-#define MII_AT001_CR_1000T_TEST_MODE_1          0x2000  /* Transmit Waveform test */
-#define MII_AT001_CR_1000T_TEST_MODE_2          0x4000  /* Master Transmit Jitter test */
-#define MII_AT001_CR_1000T_TEST_MODE_3          0x6000  /* Slave Transmit Jitter test */
-#define MII_AT001_CR_1000T_TEST_MODE_4          0x8000  /* Transmitter Distortion test */
-#define MII_AT001_CR_1000T_SPEED_MASK           0x0300
-#define MII_AT001_CR_1000T_DEFAULT_CAP_MASK     0x0300
-
-/* 1000BASE-T Status Register */
-#define MII_AT001_SR_1000T_LP_HD_CAPS           0x0400  /* LP is 1000T HD capable */
-#define MII_AT001_SR_1000T_LP_FD_CAPS           0x0800  /* LP is 1000T FD capable */
-#define MII_AT001_SR_1000T_REMOTE_RX_STATUS     0x1000  /* Remote receiver OK */
-#define MII_AT001_SR_1000T_LOCAL_RX_STATUS      0x2000  /* Local receiver OK */
-#define MII_AT001_SR_1000T_MS_CONFIG_RES        0x4000  /* 1=Local TX is Master, 0=Slave */
-#define MII_AT001_SR_1000T_MS_CONFIG_FAULT      0x8000  /* Master/Slave config fault */
-#define MII_AT001_SR_1000T_REMOTE_RX_STATUS_SHIFT   12
-#define MII_AT001_SR_1000T_LOCAL_RX_STATUS_SHIFT    13
-
-/* Extended Status Register */
-#define MII_AT001_ESR_1000T_HD_CAPS             0x1000  /* 1000T HD capable */
-#define MII_AT001_ESR_1000T_FD_CAPS             0x2000  /* 1000T FD capable */
-#define MII_AT001_ESR_1000X_HD_CAPS             0x4000  /* 1000X HD capable */
-#define MII_AT001_ESR_1000X_FD_CAPS             0x8000  /* 1000X FD capable */
+#define MII_AT001_CR_1000T_SPEED_MASK \
+       (ADVERTISE_1000FULL | ADVERTISE_1000HALF)
+#define MII_AT001_CR_1000T_DEFAULT_CAP_MASK    MII_AT001_CR_1000T_SPEED_MASK
 
 /* AT001 PHY Specific Control Register */
 #define MII_AT001_PSCR_JABBER_DISABLE           0x0001  /* 1=Jabber Function disabled */
index e28f8ba..bf7500c 100644 (file)
@@ -2051,9 +2051,9 @@ static int atl1e_suspend(struct pci_dev *pdev, pm_message_t state)
                atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
                atl1e_read_phy_reg(hw, MII_BMSR, (u16 *)&mii_bmsr_data);
 
-               mii_advertise_data = MII_AR_10T_HD_CAPS;
+               mii_advertise_data = ADVERTISE_10HALF;
 
-               if ((atl1e_write_phy_reg(hw, MII_AT001_CR, 0) != 0) ||
+               if ((atl1e_write_phy_reg(hw, MII_CTRL1000, 0) != 0) ||
                    (atl1e_write_phy_reg(hw,
                           MII_ADVERTISE, mii_advertise_data) != 0) ||
                    (atl1e_phy_commit(hw)) != 0) {
index e610e13..00bf595 100644 (file)
@@ -364,6 +364,7 @@ struct e1000_adapter {
        /* structs defined in e1000_hw.h */
        struct e1000_hw hw;
 
+       spinlock_t stats64_lock;
        struct e1000_hw_stats stats;
        struct e1000_phy_info phy_info;
        struct e1000_phy_stats phy_stats;
@@ -494,7 +495,9 @@ extern int e1000e_setup_rx_resources(struct e1000_adapter *adapter);
 extern int e1000e_setup_tx_resources(struct e1000_adapter *adapter);
 extern void e1000e_free_rx_resources(struct e1000_adapter *adapter);
 extern void e1000e_free_tx_resources(struct e1000_adapter *adapter);
-extern void e1000e_update_stats(struct e1000_adapter *adapter);
+extern struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
+                                                    struct rtnl_link_stats64
+                                                    *stats);
 extern void e1000e_set_interrupt_capability(struct e1000_adapter *adapter);
 extern void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter);
 extern void e1000e_get_hw_control(struct e1000_adapter *adapter);
index fa08b63..daa7fe4 100644 (file)
@@ -46,15 +46,15 @@ struct e1000_stats {
 };
 
 #define E1000_STAT(str, m) { \
-                       .stat_string = str, \
-                       .type = E1000_STATS, \
-                       .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
-                       .stat_offset = offsetof(struct e1000_adapter, m) }
+               .stat_string = str, \
+               .type = E1000_STATS, \
+               .sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
+               .stat_offset = offsetof(struct e1000_adapter, m) }
 #define E1000_NETDEV_STAT(str, m) { \
-                       .stat_string = str, \
-                       .type = NETDEV_STATS, \
-                       .sizeof_stat = sizeof(((struct net_device *)0)->m), \
-                       .stat_offset = offsetof(struct net_device, m) }
+               .stat_string = str, \
+               .type = NETDEV_STATS, \
+               .sizeof_stat = sizeof(((struct rtnl_link_stats64 *)0)->m), \
+               .stat_offset = offsetof(struct rtnl_link_stats64, m) }
 
 static const struct e1000_stats e1000_gstrings_stats[] = {
        E1000_STAT("rx_packets", stats.gprc),
@@ -65,21 +65,21 @@ static const struct e1000_stats e1000_gstrings_stats[] = {
        E1000_STAT("tx_broadcast", stats.bptc),
        E1000_STAT("rx_multicast", stats.mprc),
        E1000_STAT("tx_multicast", stats.mptc),
-       E1000_NETDEV_STAT("rx_errors", stats.rx_errors),
-       E1000_NETDEV_STAT("tx_errors", stats.tx_errors),
-       E1000_NETDEV_STAT("tx_dropped", stats.tx_dropped),
+       E1000_NETDEV_STAT("rx_errors", rx_errors),
+       E1000_NETDEV_STAT("tx_errors", tx_errors),
+       E1000_NETDEV_STAT("tx_dropped", tx_dropped),
        E1000_STAT("multicast", stats.mprc),
        E1000_STAT("collisions", stats.colc),
-       E1000_NETDEV_STAT("rx_length_errors", stats.rx_length_errors),
-       E1000_NETDEV_STAT("rx_over_errors", stats.rx_over_errors),
+       E1000_NETDEV_STAT("rx_length_errors", rx_length_errors),
+       E1000_NETDEV_STAT("rx_over_errors", rx_over_errors),
        E1000_STAT("rx_crc_errors", stats.crcerrs),
-       E1000_NETDEV_STAT("rx_frame_errors", stats.rx_frame_errors),
+       E1000_NETDEV_STAT("rx_frame_errors", rx_frame_errors),
        E1000_STAT("rx_no_buffer_count", stats.rnbc),
        E1000_STAT("rx_missed_errors", stats.mpc),
        E1000_STAT("tx_aborted_errors", stats.ecol),
        E1000_STAT("tx_carrier_errors", stats.tncrs),
-       E1000_NETDEV_STAT("tx_fifo_errors", stats.tx_fifo_errors),
-       E1000_NETDEV_STAT("tx_heartbeat_errors", stats.tx_heartbeat_errors),
+       E1000_NETDEV_STAT("tx_fifo_errors", tx_fifo_errors),
+       E1000_NETDEV_STAT("tx_heartbeat_errors", tx_heartbeat_errors),
        E1000_STAT("tx_window_errors", stats.latecol),
        E1000_STAT("tx_abort_late_coll", stats.latecol),
        E1000_STAT("tx_deferred_ok", stats.dc),
@@ -684,20 +684,13 @@ static int e1000_set_ringparam(struct net_device *netdev,
        rx_old = adapter->rx_ring;
 
        err = -ENOMEM;
-       tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
+       tx_ring = kmemdup(tx_old, sizeof(struct e1000_ring), GFP_KERNEL);
        if (!tx_ring)
                goto err_alloc_tx;
-       /*
-        * use a memcpy to save any previously configured
-        * items like napi structs from having to be
-        * reinitialized
-        */
-       memcpy(tx_ring, tx_old, sizeof(struct e1000_ring));
 
-       rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
+       rx_ring = kmemdup(rx_old, sizeof(struct e1000_ring), GFP_KERNEL);
        if (!rx_ring)
                goto err_alloc_rx;
-       memcpy(rx_ring, rx_old, sizeof(struct e1000_ring));
 
        adapter->tx_ring = tx_ring;
        adapter->rx_ring = rx_ring;
@@ -1255,7 +1248,6 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
        u32 ctrl_reg = 0;
-       u32 stat_reg = 0;
        u16 phy_reg = 0;
        s32 ret_val = 0;
 
@@ -1363,8 +1355,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
                 * Set the ILOS bit on the fiber Nic if half duplex link is
                 * detected.
                 */
-               stat_reg = er32(STATUS);
-               if ((stat_reg & E1000_STATUS_FD) == 0)
+               if ((er32(STATUS) & E1000_STATUS_FD) == 0)
                        ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU);
        }
 
@@ -1982,14 +1973,15 @@ static void e1000_get_ethtool_stats(struct net_device *netdev,
                                    u64 *data)
 {
        struct e1000_adapter *adapter = netdev_priv(netdev);
+       struct rtnl_link_stats64 net_stats;
        int i;
        char *p = NULL;
 
-       e1000e_update_stats(adapter);
+       e1000e_get_stats64(netdev, &net_stats);
        for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
                switch (e1000_gstrings_stats[i].type) {
                case NETDEV_STATS:
-                       p = (char *) netdev +
+                       p = (char *) &net_stats +
                                        e1000_gstrings_stats[i].stat_offset;
                        break;
                case E1000_STATS:
index fb46974..232b42b 100644 (file)
@@ -2104,7 +2104,6 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
 {
        union ich8_hws_flash_status hsfsts;
        s32 ret_val = -E1000_ERR_NVM;
-       s32 i = 0;
 
        hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
 
@@ -2140,6 +2139,8 @@ static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
                ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
                ret_val = 0;
        } else {
+               s32 i = 0;
+
                /*
                 * Otherwise poll for sometime so the current
                 * cycle has a chance to end before giving up.
index 68aa174..96921de 100644 (file)
@@ -1978,15 +1978,15 @@ static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
 {
        struct e1000_nvm_info *nvm = &hw->nvm;
        u32 eecd = er32(EECD);
-       u16 timeout = 0;
        u8 spi_stat_reg;
 
        if (nvm->type == e1000_nvm_eeprom_spi) {
+               u16 timeout = NVM_MAX_RETRY_SPI;
+
                /* Clear SK and CS */
                eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
                ew32(EECD, eecd);
                udelay(1);
-               timeout = NVM_MAX_RETRY_SPI;
 
                /*
                 * Read "Status Register" repeatedly until the LSB is cleared.
index 1c18f26..5b916b0 100644 (file)
@@ -900,8 +900,6 @@ next_desc:
 
        adapter->total_rx_bytes += total_rx_bytes;
        adapter->total_rx_packets += total_rx_packets;
-       netdev->stats.rx_bytes += total_rx_bytes;
-       netdev->stats.rx_packets += total_rx_packets;
        return cleaned;
 }
 
@@ -1057,8 +1055,6 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
        }
        adapter->total_tx_bytes += total_tx_bytes;
        adapter->total_tx_packets += total_tx_packets;
-       netdev->stats.tx_bytes += total_tx_bytes;
-       netdev->stats.tx_packets += total_tx_packets;
        return count < tx_ring->count;
 }
 
@@ -1245,8 +1241,6 @@ next_desc:
 
        adapter->total_rx_bytes += total_rx_bytes;
        adapter->total_rx_packets += total_rx_packets;
-       netdev->stats.rx_bytes += total_rx_bytes;
-       netdev->stats.rx_packets += total_rx_packets;
        return cleaned;
 }
 
@@ -1426,8 +1420,6 @@ next_desc:
 
        adapter->total_rx_bytes += total_rx_bytes;
        adapter->total_rx_packets += total_rx_packets;
-       netdev->stats.rx_bytes += total_rx_bytes;
-       netdev->stats.rx_packets += total_rx_packets;
        return cleaned;
 }
 
@@ -2728,7 +2720,6 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl, rfctl;
-       u32 psrctl = 0;
        u32 pages = 0;
 
        /* Workaround Si errata on 82579 - configure jumbo frame flow */
@@ -2827,6 +2818,8 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
                adapter->rx_ps_pages = 0;
 
        if (adapter->rx_ps_pages) {
+               u32 psrctl = 0;
+
                /* Configure extra packet-split registers */
                rfctl = er32(RFCTL);
                rfctl |= E1000_RFCTL_EXTEN;
@@ -3028,7 +3021,6 @@ static void e1000_set_multi(struct net_device *netdev)
        struct netdev_hw_addr *ha;
        u8  *mta_list;
        u32 rctl;
-       int i;
 
        /* Check for Promiscuous and All Multicast modes */
 
@@ -3051,12 +3043,13 @@ static void e1000_set_multi(struct net_device *netdev)
        ew32(RCTL, rctl);
 
        if (!netdev_mc_empty(netdev)) {
+               int i = 0;
+
                mta_list = kmalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
                if (!mta_list)
                        return;
 
                /* prepare a packed array of only addresses. */
-               i = 0;
                netdev_for_each_mc_addr(ha, netdev)
                        memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
@@ -3338,6 +3331,8 @@ int e1000e_up(struct e1000_adapter *adapter)
        return 0;
 }
 
+static void e1000e_update_stats(struct e1000_adapter *adapter);
+
 void e1000e_down(struct e1000_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
@@ -3372,6 +3367,11 @@ void e1000e_down(struct e1000_adapter *adapter)
        del_timer_sync(&adapter->phy_info_timer);
 
        netif_carrier_off(netdev);
+
+       spin_lock(&adapter->stats64_lock);
+       e1000e_update_stats(adapter);
+       spin_unlock(&adapter->stats64_lock);
+
        adapter->link_speed = 0;
        adapter->link_duplex = 0;
 
@@ -3413,6 +3413,8 @@ static int __devinit e1000_sw_init(struct e1000_adapter *adapter)
        adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
+       spin_lock_init(&adapter->stats64_lock);
+
        e1000e_set_interrupt_capability(adapter);
 
        if (e1000_alloc_queues(adapter))
@@ -3886,7 +3888,7 @@ release:
  * e1000e_update_stats - Update the board statistics counters
  * @adapter: board private structure
  **/
-void e1000e_update_stats(struct e1000_adapter *adapter)
+static void e1000e_update_stats(struct e1000_adapter *adapter)
 {
        struct net_device *netdev = adapter->netdev;
        struct e1000_hw *hw = &adapter->hw;
@@ -3998,10 +4000,11 @@ static void e1000_phy_read_status(struct e1000_adapter *adapter)
 {
        struct e1000_hw *hw = &adapter->hw;
        struct e1000_phy_regs *phy = &adapter->phy_regs;
-       int ret_val;
 
        if ((er32(STATUS) & E1000_STATUS_LU) &&
            (adapter->hw.phy.media_type == e1000_media_type_copper)) {
+               int ret_val;
+
                ret_val  = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr);
                ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr);
                ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise);
@@ -4147,7 +4150,6 @@ static void e1000_watchdog_task(struct work_struct *work)
        struct e1000_ring *tx_ring = adapter->tx_ring;
        struct e1000_hw *hw = &adapter->hw;
        u32 link, tctl;
-       int tx_pending = 0;
 
        link = e1000e_has_link(adapter);
        if ((netif_carrier_ok(netdev)) && link) {
@@ -4285,7 +4287,9 @@ static void e1000_watchdog_task(struct work_struct *work)
        }
 
 link_up:
+       spin_lock(&adapter->stats64_lock);
        e1000e_update_stats(adapter);
+       spin_unlock(&adapter->stats64_lock);
 
        mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
        adapter->tpt_old = adapter->stats.tpt;
@@ -4299,21 +4303,18 @@ link_up:
 
        e1000e_update_adaptive(&adapter->hw);
 
-       if (!netif_carrier_ok(netdev)) {
-               tx_pending = (e1000_desc_unused(tx_ring) + 1 <
-                              tx_ring->count);
-               if (tx_pending) {
-                       /*
-                        * We've lost link, so the controller stops DMA,
-                        * but we've got queued Tx work that's never going
-                        * to get done, so reset controller to flush Tx.
-                        * (Do the reset outside of interrupt context).
-                        */
-                       adapter->tx_timeout_count++;
-                       schedule_work(&adapter->reset_task);
-                       /* return immediately since reset is imminent */
-                       return;
-               }
+       if (!netif_carrier_ok(netdev) &&
+           (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) {
+               /*
+                * We've lost link, so the controller stops DMA,
+                * but we've got queued Tx work that's never going
+                * to get done, so reset controller to flush Tx.
+                * (Do the reset outside of interrupt context).
+                */
+               adapter->tx_timeout_count++;
+               schedule_work(&adapter->reset_task);
+               /* return immediately since reset is imminent */
+               return;
        }
 
        /* Simple mode for Interrupt Throttle Rate (ITR) */
@@ -4384,13 +4385,13 @@ static int e1000_tso(struct e1000_adapter *adapter,
        u32 cmd_length = 0;
        u16 ipcse = 0, tucse, mss;
        u8 ipcss, ipcso, tucss, tucso, hdr_len;
-       int err;
 
        if (!skb_is_gso(skb))
                return 0;
 
        if (skb_header_cloned(skb)) {
-               err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+               int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+
                if (err)
                        return err;
        }
@@ -4897,16 +4898,55 @@ static void e1000_reset_task(struct work_struct *work)
 }
 
 /**
- * e1000_get_stats - Get System Network Statistics
+ * e1000_get_stats64 - Get System Network Statistics
  * @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
  *
  * Returns the address of the device statistics structure.
- * The statistics are actually updated from the timer callback.
  **/
-static struct net_device_stats *e1000_get_stats(struct net_device *netdev)
+struct rtnl_link_stats64 *e1000e_get_stats64(struct net_device *netdev,
+                                             struct rtnl_link_stats64 *stats)
 {
-       /* only return the current stats */
-       return &netdev->stats;
+       struct e1000_adapter *adapter = netdev_priv(netdev);
+
+       memset(stats, 0, sizeof(struct rtnl_link_stats64));
+       spin_lock(&adapter->stats64_lock);
+       e1000e_update_stats(adapter);
+       /* Fill out the OS statistics structure */
+       stats->rx_bytes = adapter->stats.gorc;
+       stats->rx_packets = adapter->stats.gprc;
+       stats->tx_bytes = adapter->stats.gotc;
+       stats->tx_packets = adapter->stats.gptc;
+       stats->multicast = adapter->stats.mprc;
+       stats->collisions = adapter->stats.colc;
+
+       /* Rx Errors */
+
+       /*
+        * RLEC on some newer hardware can be incorrect so build
+        * our own version based on RUC and ROC
+        */
+       stats->rx_errors = adapter->stats.rxerrc +
+               adapter->stats.crcerrs + adapter->stats.algnerrc +
+               adapter->stats.ruc + adapter->stats.roc +
+               adapter->stats.cexterr;
+       stats->rx_length_errors = adapter->stats.ruc +
+                                             adapter->stats.roc;
+       stats->rx_crc_errors = adapter->stats.crcerrs;
+       stats->rx_frame_errors = adapter->stats.algnerrc;
+       stats->rx_missed_errors = adapter->stats.mpc;
+
+       /* Tx Errors */
+       stats->tx_errors = adapter->stats.ecol +
+                                      adapter->stats.latecol;
+       stats->tx_aborted_errors = adapter->stats.ecol;
+       stats->tx_window_errors = adapter->stats.latecol;
+       stats->tx_carrier_errors = adapter->stats.tncrs;
+
+       /* Tx Dropped needs to be maintained elsewhere */
+
+       spin_unlock(&adapter->stats64_lock);
+       return stats;
 }
 
 /**
@@ -5476,9 +5516,10 @@ static irqreturn_t e1000_intr_msix(int irq, void *data)
 {
        struct net_device *netdev = data;
        struct e1000_adapter *adapter = netdev_priv(netdev);
-       int vector, msix_irq;
 
        if (adapter->msix_entries) {
+               int vector, msix_irq;
+
                vector = 0;
                msix_irq = adapter->msix_entries[vector].vector;
                disable_irq(msix_irq);
@@ -5675,7 +5716,7 @@ static const struct net_device_ops e1000e_netdev_ops = {
        .ndo_open               = e1000_open,
        .ndo_stop               = e1000_close,
        .ndo_start_xmit         = e1000_xmit_frame,
-       .ndo_get_stats          = e1000_get_stats,
+       .ndo_get_stats64        = e1000e_get_stats64,
        .ndo_set_multicast_list = e1000_set_multi,
        .ndo_set_mac_address    = e1000_set_mac,
        .ndo_change_mtu         = e1000_change_mtu,
index 6bea051..6ae31fc 100644 (file)
@@ -2409,9 +2409,7 @@ static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg)
 s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
 {
        s32 ret_val;
-       u32 page_select = 0;
        u32 page = offset >> IGP_PAGE_SHIFT;
-       u32 page_shift = 0;
 
        ret_val = hw->phy.ops.acquire(hw);
        if (ret_val)
@@ -2427,6 +2425,8 @@ s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
        hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
 
        if (offset > MAX_PHY_MULTI_PAGE_REG) {
+               u32 page_shift, page_select;
+
                /*
                 * Page select is register 31 for phy address 1 and 22 for
                 * phy address 2 and 3. Page select is shifted only for
@@ -2468,9 +2468,7 @@ out:
 s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
 {
        s32 ret_val;
-       u32 page_select = 0;
        u32 page = offset >> IGP_PAGE_SHIFT;
-       u32 page_shift = 0;
 
        ret_val = hw->phy.ops.acquire(hw);
        if (ret_val)
@@ -2486,6 +2484,8 @@ s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
        hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
 
        if (offset > MAX_PHY_MULTI_PAGE_REG) {
+               u32 page_shift, page_select;
+
                /*
                 * Page select is register 31 for phy address 1 and 22 for
                 * phy address 2 and 3. Page select is shifted only for
index a937f49..ca3be4f 100644 (file)
@@ -32,8 +32,8 @@
 
 #define DRV_NAME               "enic"
 #define DRV_DESCRIPTION                "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION            "1.4.1.10"
-#define DRV_COPYRIGHT          "Copyright 2008-2010 Cisco Systems, Inc"
+#define DRV_VERSION            "2.1.1.2"
+#define DRV_COPYRIGHT          "Copyright 2008-2011 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX          6
 
@@ -49,7 +49,7 @@ struct enic_msix_entry {
        void *devid;
 };
 
-#define ENIC_SET_APPLIED               (1 << 0)
+#define ENIC_PORT_REQUEST_APPLIED      (1 << 0)
 #define ENIC_SET_REQUEST               (1 << 1)
 #define ENIC_SET_NAME                  (1 << 2)
 #define ENIC_SET_INSTANCE              (1 << 3)
index a0af48c..89664c6 100644 (file)
@@ -1318,18 +1318,20 @@ static int enic_set_port_profile(struct enic *enic, u8 *mac)
                vic_provinfo_free(vp);
                if (err)
                        return err;
-
-               enic->pp.set |= ENIC_SET_APPLIED;
                break;
 
        case PORT_REQUEST_DISASSOCIATE:
-               enic->pp.set &= ~ENIC_SET_APPLIED;
                break;
 
        default:
                return -EINVAL;
        }
 
+       /* Set flag to indicate that the port assoc/disassoc
+        * request has been sent out to fw
+        */
+       enic->pp.set |= ENIC_PORT_REQUEST_APPLIED;
+
        return 0;
 }
 
@@ -1411,7 +1413,7 @@ static int enic_get_vf_port(struct net_device *netdev, int vf,
        int err, error, done;
        u16 response = PORT_PROFILE_RESPONSE_SUCCESS;
 
-       if (!(enic->pp.set & ENIC_SET_APPLIED))
+       if (!(enic->pp.set & ENIC_PORT_REQUEST_APPLIED))
                return -ENODATA;
 
        err = enic_dev_init_done(enic, &done, &error);
index 0a2368f..c1552b6 100644 (file)
@@ -129,6 +129,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw)
                break;
        case E1000_DEV_ID_82580_COPPER:
        case E1000_DEV_ID_82580_FIBER:
+       case E1000_DEV_ID_82580_QUAD_FIBER:
        case E1000_DEV_ID_82580_SERDES:
        case E1000_DEV_ID_82580_SGMII:
        case E1000_DEV_ID_82580_COPPER_DUAL:
index e2638af..281324e 100644 (file)
@@ -54,6 +54,7 @@ struct e1000_hw;
 #define E1000_DEV_ID_82580_SERDES             0x1510
 #define E1000_DEV_ID_82580_SGMII              0x1511
 #define E1000_DEV_ID_82580_COPPER_DUAL        0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER         0x1527
 #define E1000_DEV_ID_DH89XXCC_SGMII           0x0438
 #define E1000_DEV_ID_DH89XXCC_SERDES          0x043A
 #define E1000_DEV_ID_DH89XXCC_BACKPLANE       0x043C
index 58c665b..200cc32 100644 (file)
@@ -68,6 +68,7 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
+       { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
        { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
index c7a6c44..9f6d670 100644 (file)
@@ -592,8 +592,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        ppp_release(NULL, file);
                        err = 0;
                } else
-                       printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
-                              atomic_long_read(&file->f_count));
+                       pr_warn("PPPIOCDETACH file->f_count=%ld\n",
+                               atomic_long_read(&file->f_count));
                mutex_unlock(&ppp_mutex);
                return err;
        }
@@ -630,7 +630,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
        if (pf->kind != INTERFACE) {
                /* can't happen */
-               printk(KERN_ERR "PPP: not interface or channel??\n");
+               pr_err("PPP: not interface or channel??\n");
                return -EINVAL;
        }
 
@@ -704,7 +704,8 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                }
                vj = slhc_init(val2+1, val+1);
                if (!vj) {
-                       printk(KERN_ERR "PPP: no memory (VJ compressor)\n");
+                       netdev_err(ppp->dev,
+                                  "PPP: no memory (VJ compressor)\n");
                        err = -ENOMEM;
                        break;
                }
@@ -898,17 +899,17 @@ static int __init ppp_init(void)
 {
        int err;
 
-       printk(KERN_INFO "PPP generic driver version " PPP_VERSION "\n");
+       pr_info("PPP generic driver version " PPP_VERSION "\n");
 
        err = register_pernet_device(&ppp_net_ops);
        if (err) {
-               printk(KERN_ERR "failed to register PPP pernet device (%d)\n", err);
+               pr_err("failed to register PPP pernet device (%d)\n", err);
                goto out;
        }
 
        err = register_chrdev(PPP_MAJOR, "ppp", &ppp_device_fops);
        if (err) {
-               printk(KERN_ERR "failed to register PPP device (%d)\n", err);
+               pr_err("failed to register PPP device (%d)\n", err);
                goto out_net;
        }
 
@@ -1078,7 +1079,7 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb)
        new_skb = alloc_skb(new_skb_size, GFP_ATOMIC);
        if (!new_skb) {
                if (net_ratelimit())
-                       printk(KERN_ERR "PPP: no memory (comp pkt)\n");
+                       netdev_err(ppp->dev, "PPP: no memory (comp pkt)\n");
                return NULL;
        }
        if (ppp->dev->hard_header_len > PPP_HDRLEN)
@@ -1108,7 +1109,7 @@ pad_compress_skb(struct ppp *ppp, struct sk_buff *skb)
                 * the same number.
                 */
                if (net_ratelimit())
-                       printk(KERN_ERR "ppp: compressor dropped pkt\n");
+                       netdev_err(ppp->dev, "ppp: compressor dropped pkt\n");
                kfree_skb(skb);
                kfree_skb(new_skb);
                new_skb = NULL;
@@ -1138,7 +1139,9 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                if (ppp->pass_filter &&
                    sk_run_filter(skb, ppp->pass_filter) == 0) {
                        if (ppp->debug & 1)
-                               printk(KERN_DEBUG "PPP: outbound frame not passed\n");
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "PPP: outbound frame "
+                                             "not passed\n");
                        kfree_skb(skb);
                        return;
                }
@@ -1164,7 +1167,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
                new_skb = alloc_skb(skb->len + ppp->dev->hard_header_len - 2,
                                    GFP_ATOMIC);
                if (!new_skb) {
-                       printk(KERN_ERR "PPP: no memory (VJ comp pkt)\n");
+                       netdev_err(ppp->dev, "PPP: no memory (VJ comp pkt)\n");
                        goto drop;
                }
                skb_reserve(new_skb, ppp->dev->hard_header_len - 2);
@@ -1202,7 +1205,9 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb)
            proto != PPP_LCP && proto != PPP_CCP) {
                if (!(ppp->flags & SC_CCP_UP) && (ppp->flags & SC_MUST_COMP)) {
                        if (net_ratelimit())
-                               printk(KERN_ERR "ppp: compression required but down - pkt dropped.\n");
+                               netdev_err(ppp->dev,
+                                          "ppp: compression required but "
+                                          "down - pkt dropped.\n");
                        goto drop;
                }
                skb = pad_compress_skb(ppp, skb);
@@ -1505,7 +1510,7 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb)
  noskb:
        spin_unlock_bh(&pch->downl);
        if (ppp->debug & 1)
-               printk(KERN_ERR "PPP: no memory (fragment)\n");
+               netdev_err(ppp->dev, "PPP: no memory (fragment)\n");
        ++ppp->dev->stats.tx_errors;
        ++ppp->nxseq;
        return 1;       /* abandon the frame */
@@ -1686,7 +1691,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                        /* copy to a new sk_buff with more tailroom */
                        ns = dev_alloc_skb(skb->len + 128);
                        if (!ns) {
-                               printk(KERN_ERR"PPP: no memory (VJ decomp)\n");
+                               netdev_err(ppp->dev, "PPP: no memory "
+                                          "(VJ decomp)\n");
                                goto err;
                        }
                        skb_reserve(ns, 2);
@@ -1699,7 +1705,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
 
                len = slhc_uncompress(ppp->vj, skb->data + 2, skb->len - 2);
                if (len <= 0) {
-                       printk(KERN_DEBUG "PPP: VJ decompression error\n");
+                       netdev_printk(KERN_DEBUG, ppp->dev,
+                                     "PPP: VJ decompression error\n");
                        goto err;
                }
                len += 2;
@@ -1721,7 +1728,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                        goto err;
 
                if (slhc_remember(ppp->vj, skb->data + 2, skb->len - 2) <= 0) {
-                       printk(KERN_ERR "PPP: VJ uncompressed error\n");
+                       netdev_err(ppp->dev, "PPP: VJ uncompressed error\n");
                        goto err;
                }
                proto = PPP_IP;
@@ -1762,8 +1769,9 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb)
                        if (ppp->pass_filter &&
                            sk_run_filter(skb, ppp->pass_filter) == 0) {
                                if (ppp->debug & 1)
-                                       printk(KERN_DEBUG "PPP: inbound frame "
-                                              "not passed\n");
+                                       netdev_printk(KERN_DEBUG, ppp->dev,
+                                                     "PPP: inbound frame "
+                                                     "not passed\n");
                                kfree_skb(skb);
                                return;
                        }
@@ -1821,7 +1829,8 @@ ppp_decompress_frame(struct ppp *ppp, struct sk_buff *skb)
 
                ns = dev_alloc_skb(obuff_size);
                if (!ns) {
-                       printk(KERN_ERR "ppp_decompress_frame: no memory\n");
+                       netdev_err(ppp->dev, "ppp_decompress_frame: "
+                                  "no memory\n");
                        goto err;
                }
                /* the decompressor still expects the A/C bytes in the hdr */
@@ -1989,7 +1998,7 @@ ppp_mp_reconstruct(struct ppp *ppp)
        u32 seq = ppp->nextseq;
        u32 minseq = ppp->minseq;
        struct sk_buff_head *list = &ppp->mrq;
-       struct sk_buff *p, *next;
+       struct sk_buff *p, *tmp;
        struct sk_buff *head, *tail;
        struct sk_buff *skb = NULL;
        int lost = 0, len = 0;
@@ -1998,13 +2007,15 @@ ppp_mp_reconstruct(struct ppp *ppp)
                return NULL;
        head = list->next;
        tail = NULL;
-       for (p = head; p != (struct sk_buff *) list; p = next) {
-               next = p->next;
+       skb_queue_walk_safe(list, p, tmp) {
+       again:
                if (seq_before(PPP_MP_CB(p)->sequence, seq)) {
                        /* this can't happen, anyway ignore the skb */
-                       printk(KERN_ERR "ppp_mp_reconstruct bad seq %u < %u\n",
-                              PPP_MP_CB(p)->sequence, seq);
-                       head = next;
+                       netdev_err(ppp->dev, "ppp_mp_reconstruct bad "
+                                  "seq %u < %u\n",
+                                  PPP_MP_CB(p)->sequence, seq);
+                       __skb_unlink(p, list);
+                       kfree_skb(p);
                        continue;
                }
                if (PPP_MP_CB(p)->sequence != seq) {
@@ -2016,8 +2027,7 @@ ppp_mp_reconstruct(struct ppp *ppp)
                        lost = 1;
                        seq = seq_before(minseq, PPP_MP_CB(p)->sequence)?
                                minseq + 1: PPP_MP_CB(p)->sequence;
-                       next = p;
-                       continue;
+                       goto again;
                }
 
                /*
@@ -2042,17 +2052,9 @@ ppp_mp_reconstruct(struct ppp *ppp)
                    (PPP_MP_CB(head)->BEbits & B)) {
                        if (len > ppp->mrru + 2) {
                                ++ppp->dev->stats.rx_length_errors;
-                               printk(KERN_DEBUG "PPP: reconstructed packet"
-                                      " is too long (%d)\n", len);
-                       } else if (p == head) {
-                               /* fragment is complete packet - reuse skb */
-                               tail = p;
-                               skb = skb_get(p);
-                               break;
-                       } else if ((skb = dev_alloc_skb(len)) == NULL) {
-                               ++ppp->dev->stats.rx_missed_errors;
-                               printk(KERN_DEBUG "PPP: no memory for "
-                                      "reconstructed packet");
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "PPP: reconstructed packet"
+                                             " is too long (%d)\n", len);
                        } else {
                                tail = p;
                                break;
@@ -2065,9 +2067,17 @@ ppp_mp_reconstruct(struct ppp *ppp)
                 * and we haven't found a complete valid packet yet,
                 * we can discard up to and including this fragment.
                 */
-               if (PPP_MP_CB(p)->BEbits & E)
-                       head = next;
+               if (PPP_MP_CB(p)->BEbits & E) {
+                       struct sk_buff *tmp2;
 
+                       skb_queue_reverse_walk_from_safe(list, p, tmp2) {
+                               __skb_unlink(p, list);
+                               kfree_skb(p);
+                       }
+                       head = skb_peek(list);
+                       if (!head)
+                               break;
+               }
                ++seq;
        }
 
@@ -2077,26 +2087,37 @@ ppp_mp_reconstruct(struct ppp *ppp)
                   signal a receive error. */
                if (PPP_MP_CB(head)->sequence != ppp->nextseq) {
                        if (ppp->debug & 1)
-                               printk(KERN_DEBUG "  missed pkts %u..%u\n",
-                                      ppp->nextseq,
-                                      PPP_MP_CB(head)->sequence-1);
+                               netdev_printk(KERN_DEBUG, ppp->dev,
+                                             "  missed pkts %u..%u\n",
+                                             ppp->nextseq,
+                                             PPP_MP_CB(head)->sequence-1);
                        ++ppp->dev->stats.rx_dropped;
                        ppp_receive_error(ppp);
                }
 
-               if (head != tail)
-                       /* copy to a single skb */
-                       for (p = head; p != tail->next; p = p->next)
-                               skb_copy_bits(p, 0, skb_put(skb, p->len), p->len);
-               ppp->nextseq = PPP_MP_CB(tail)->sequence + 1;
-               head = tail->next;
-       }
+               skb = head;
+               if (head != tail) {
+                       struct sk_buff **fragpp = &skb_shinfo(skb)->frag_list;
+                       p = skb_queue_next(list, head);
+                       __skb_unlink(skb, list);
+                       skb_queue_walk_from_safe(list, p, tmp) {
+                               __skb_unlink(p, list);
+                               *fragpp = p;
+                               p->next = NULL;
+                               fragpp = &p->next;
+
+                               skb->len += p->len;
+                               skb->data_len += p->len;
+                               skb->truesize += p->len;
+
+                               if (p == tail)
+                                       break;
+                       }
+               } else {
+                       __skb_unlink(skb, list);
+               }
 
-       /* Discard all the skbuffs that we have copied the data out of
-          or that we can't use. */
-       while ((p = list->next) != head) {
-               __skb_unlink(p, list);
-               kfree_skb(p);
+               ppp->nextseq = PPP_MP_CB(tail)->sequence + 1;
        }
 
        return skb;
@@ -2617,8 +2638,8 @@ ppp_create_interface(struct net *net, int unit, int *retp)
        ret = register_netdev(dev);
        if (ret != 0) {
                unit_put(&pn->units_idr, unit);
-               printk(KERN_ERR "PPP: couldn't register device %s (%d)\n",
-                      dev->name, ret);
+               netdev_err(ppp->dev, "PPP: couldn't register device %s (%d)\n",
+                          dev->name, ret);
                goto out2;
        }
 
@@ -2690,9 +2711,9 @@ static void ppp_destroy_interface(struct ppp *ppp)
 
        if (!ppp->file.dead || ppp->n_channels) {
                /* "can't happen" */
-               printk(KERN_ERR "ppp: destroying ppp struct %p but dead=%d "
-                      "n_channels=%d !\n", ppp, ppp->file.dead,
-                      ppp->n_channels);
+               netdev_err(ppp->dev, "ppp: destroying ppp struct %p "
+                          "but dead=%d n_channels=%d !\n",
+                          ppp, ppp->file.dead, ppp->n_channels);
                return;
        }
 
@@ -2834,8 +2855,7 @@ static void ppp_destroy_channel(struct channel *pch)
 
        if (!pch->file.dead) {
                /* "can't happen" */
-               printk(KERN_ERR "ppp: destroying undead channel %p !\n",
-                      pch);
+               pr_err("ppp: destroying undead channel %p !\n", pch);
                return;
        }
        skb_queue_purge(&pch->file.xq);
@@ -2847,7 +2867,7 @@ static void __exit ppp_cleanup(void)
 {
        /* should never happen */
        if (atomic_read(&ppp_unit_count) || atomic_read(&channel_count))
-               printk(KERN_ERR "PPP: removing module but units remain!\n");
+               pr_err("PPP: removing module but units remain!\n");
        unregister_chrdev(PPP_MAJOR, "ppp");
        device_destroy(ppp_class, MKDEV(PPP_MAJOR, 0));
        class_destroy(ppp_class);
@@ -2865,7 +2885,7 @@ static int __unit_alloc(struct idr *p, void *ptr, int n)
 
 again:
        if (!idr_pre_get(p, GFP_KERNEL)) {
-               printk(KERN_ERR "PPP: No free memory for idr\n");
+               pr_err("PPP: No free memory for idr\n");
                return -ENOMEM;
        }
 
index 09cac70..0d6fec6 100644 (file)
@@ -2923,6 +2923,7 @@ static u16 wol_calc_crc(int size, u8 *pattern, u8 *mask_pattern)
 static int velocity_set_wol(struct velocity_info *vptr)
 {
        struct mac_regs __iomem *regs = vptr->mac_regs;
+       enum speed_opt spd_dpx = vptr->options.spd_dpx;
        static u8 buf[256];
        int i;
 
@@ -2968,6 +2969,12 @@ static int velocity_set_wol(struct velocity_info *vptr)
 
        writew(0x0FFF, &regs->WOLSRClr);
 
+       if (spd_dpx == SPD_DPX_1000_FULL)
+               goto mac_done;
+
+       if (spd_dpx != SPD_DPX_AUTO)
+               goto advertise_done;
+
        if (vptr->mii_status & VELOCITY_AUTONEG_ENABLE) {
                if (PHYID_GET_PHY_ID(vptr->phy_id) == PHYID_CICADA_CS8201)
                        MII_REG_BITS_ON(AUXCR_MDPPS, MII_NCONFIG, vptr->mac_regs);
@@ -2978,6 +2985,7 @@ static int velocity_set_wol(struct velocity_info *vptr)
        if (vptr->mii_status & VELOCITY_SPEED_1000)
                MII_REG_BITS_ON(BMCR_ANRESTART, MII_BMCR, vptr->mac_regs);
 
+advertise_done:
        BYTE_REG_BITS_ON(CHIPGCR_FCMODE, &regs->CHIPGCR);
 
        {
@@ -2987,6 +2995,7 @@ static int velocity_set_wol(struct velocity_info *vptr)
                writeb(GCR, &regs->CHIPGCR);
        }
 
+mac_done:
        BYTE_REG_BITS_OFF(ISR_PWEI, &regs->ISR);
        /* Turn on SWPTAG just before entering power mode */
        BYTE_REG_BITS_ON(STICKHW_SWPTAG, &regs->STICKHW);
index aa2e69b..d722753 100644 (file)
@@ -361,7 +361,7 @@ enum  velocity_owner {
 #define MAC_REG_CHIPGSR     0x9C
 #define MAC_REG_TESTCFG     0x9D
 #define MAC_REG_DEBUG       0x9E
-#define MAC_REG_CHIPGCR     0x9F
+#define MAC_REG_CHIPGCR     0x9F       /* Chip Operation and Diagnostic Control */
 #define MAC_REG_WOLCR0_SET  0xA0
 #define MAC_REG_WOLCR1_SET  0xA1
 #define MAC_REG_PWCFG_SET   0xA2
@@ -848,10 +848,10 @@ enum  velocity_owner {
  *     Bits in CHIPGCR register
  */
 
-#define CHIPGCR_FCGMII      0x80       /* enable GMII mode */
-#define CHIPGCR_FCFDX       0x40
+#define CHIPGCR_FCGMII      0x80       /* force GMII (else MII only) */
+#define CHIPGCR_FCFDX       0x40       /* force full duplex */
 #define CHIPGCR_FCRESV      0x20
-#define CHIPGCR_FCMODE      0x10
+#define CHIPGCR_FCMODE      0x10       /* enable MAC forced mode */
 #define CHIPGCR_LPSOPT      0x08
 #define CHIPGCR_TM1US       0x04
 #define CHIPGCR_TM0US       0x02
index 01c05f5..77097e3 100644 (file)
@@ -387,8 +387,8 @@ vxge_hw_vpath_eprom_img_ver_get(struct __vxge_hw_device *hldev,
                data1 = steer_ctrl = 0;
 
                status = vxge_hw_vpath_fw_api(vpath,
-                       VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
                        VXGE_HW_FW_API_GET_EPROM_REV,
+                       VXGE_HW_RTS_ACCESS_STEER_CTRL_DATA_STRUCT_SEL_FW_MEMO,
                        0, &data0, &data1, &steer_ctrl);
                if (status != VXGE_HW_OK)
                        break;
@@ -2868,6 +2868,8 @@ __vxge_hw_ring_create(struct __vxge_hw_vpath_handle *vp,
        ring->rxd_init = attr->rxd_init;
        ring->rxd_term = attr->rxd_term;
        ring->buffer_mode = config->buffer_mode;
+       ring->tim_rti_cfg1_saved = vp->vpath->tim_rti_cfg1_saved;
+       ring->tim_rti_cfg3_saved = vp->vpath->tim_rti_cfg3_saved;
        ring->rxds_limit = config->rxds_limit;
 
        ring->rxd_size = vxge_hw_ring_rxd_size_get(config->buffer_mode);
@@ -3511,6 +3513,8 @@ __vxge_hw_fifo_create(struct __vxge_hw_vpath_handle *vp,
 
        /* apply "interrupts per txdl" attribute */
        fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_UTILZ;
+       fifo->tim_tti_cfg1_saved = vpath->tim_tti_cfg1_saved;
+       fifo->tim_tti_cfg3_saved = vpath->tim_tti_cfg3_saved;
 
        if (fifo->config->intr)
                fifo->interrupt_type = VXGE_HW_FIFO_TXD_INT_TYPE_PER_LIST;
@@ -4377,6 +4381,8 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                }
 
                writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+               vpath->tim_tti_cfg1_saved = val64;
+
                val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_TX]);
 
                if (config->tti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) {
@@ -4433,6 +4439,7 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                }
 
                writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]);
+               vpath->tim_tti_cfg3_saved = val64;
        }
 
        if (config->ring.enable == VXGE_HW_RING_ENABLE) {
@@ -4481,6 +4488,8 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                }
 
                writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]);
+               vpath->tim_rti_cfg1_saved = val64;
+
                val64 = readq(&vp_reg->tim_cfg2_int_num[VXGE_HW_VPATH_INTR_RX]);
 
                if (config->rti.uec_a != VXGE_HW_USE_FLASH_DEFAULT) {
@@ -4537,6 +4546,7 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
                }
 
                writeq(val64, &vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]);
+               vpath->tim_rti_cfg3_saved = val64;
        }
 
        val64 = 0;
@@ -4555,26 +4565,6 @@ __vxge_hw_vpath_tim_configure(struct __vxge_hw_device *hldev, u32 vp_id)
        return status;
 }
 
-void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id)
-{
-       struct __vxge_hw_virtualpath *vpath;
-       struct vxge_hw_vpath_reg __iomem *vp_reg;
-       struct vxge_hw_vp_config *config;
-       u64 val64;
-
-       vpath = &hldev->virtual_paths[vp_id];
-       vp_reg = vpath->vp_reg;
-       config = vpath->vp_config;
-
-       if (config->fifo.enable == VXGE_HW_FIFO_ENABLE &&
-           config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) {
-               config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE;
-               val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
-               val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
-               writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
-       }
-}
-
 /*
  * __vxge_hw_vpath_initialize
  * This routine is the final phase of init which initializes the
index e249e28..3c53aa7 100644 (file)
@@ -682,6 +682,10 @@ struct __vxge_hw_virtualpath {
        u32                             vsport_number;
        u32                             max_kdfc_db;
        u32                             max_nofl_db;
+       u64                             tim_tti_cfg1_saved;
+       u64                             tim_tti_cfg3_saved;
+       u64                             tim_rti_cfg1_saved;
+       u64                             tim_rti_cfg3_saved;
 
        struct __vxge_hw_ring *____cacheline_aligned ringh;
        struct __vxge_hw_fifo *____cacheline_aligned fifoh;
@@ -921,6 +925,9 @@ struct __vxge_hw_ring {
        u32                                     doorbell_cnt;
        u32                                     total_db_cnt;
        u64                                     rxds_limit;
+       u32                                     rtimer;
+       u64                                     tim_rti_cfg1_saved;
+       u64                                     tim_rti_cfg3_saved;
 
        enum vxge_hw_status (*callback)(
                        struct __vxge_hw_ring *ringh,
@@ -1000,6 +1007,9 @@ struct __vxge_hw_fifo {
        u32                                     per_txdl_space;
        u32                                     vp_id;
        u32                                     tx_intr_num;
+       u32                                     rtimer;
+       u64                                     tim_tti_cfg1_saved;
+       u64                                     tim_tti_cfg3_saved;
 
        enum vxge_hw_status (*callback)(
                        struct __vxge_hw_fifo *fifo_handle,
index c81a651..e40f619 100644 (file)
@@ -371,9 +371,6 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
        struct vxge_hw_ring_rxd_info ext_info;
        vxge_debug_entryexit(VXGE_TRACE, "%s: %s:%d",
                ring->ndev->name, __func__, __LINE__);
-       ring->pkts_processed = 0;
-
-       vxge_hw_ring_replenish(ringh);
 
        do {
                prefetch((char *)dtr + L1_CACHE_BYTES);
@@ -1588,6 +1585,36 @@ static int vxge_reset_vpath(struct vxgedev *vdev, int vp_id)
        return ret;
 }
 
+/* Configure CI */
+static void vxge_config_ci_for_tti_rti(struct vxgedev *vdev)
+{
+       int i = 0;
+
+       /* Enable CI for RTI */
+       if (vdev->config.intr_type == MSI_X) {
+               for (i = 0; i < vdev->no_of_vpath; i++) {
+                       struct __vxge_hw_ring *hw_ring;
+
+                       hw_ring = vdev->vpaths[i].ring.handle;
+                       vxge_hw_vpath_dynamic_rti_ci_set(hw_ring);
+               }
+       }
+
+       /* Enable CI for TTI */
+       for (i = 0; i < vdev->no_of_vpath; i++) {
+               struct __vxge_hw_fifo *hw_fifo = vdev->vpaths[i].fifo.handle;
+               vxge_hw_vpath_tti_ci_set(hw_fifo);
+               /*
+                * For Inta (with or without napi), Set CI ON for only one
+                * vpath. (Have only one free running timer).
+                */
+               if ((vdev->config.intr_type == INTA) && (i == 0))
+                       break;
+       }
+
+       return;
+}
+
 static int do_vxge_reset(struct vxgedev *vdev, int event)
 {
        enum vxge_hw_status status;
@@ -1753,6 +1780,9 @@ static int do_vxge_reset(struct vxgedev *vdev, int event)
                netif_tx_wake_all_queues(vdev->ndev);
        }
 
+       /* configure CI */
+       vxge_config_ci_for_tti_rti(vdev);
+
 out:
        vxge_debug_entryexit(VXGE_TRACE,
                "%s:%d  Exiting...", __func__, __LINE__);
@@ -1793,22 +1823,29 @@ static void vxge_reset(struct work_struct *work)
  */
 static int vxge_poll_msix(struct napi_struct *napi, int budget)
 {
-       struct vxge_ring *ring =
-               container_of(napi, struct vxge_ring, napi);
+       struct vxge_ring *ring = container_of(napi, struct vxge_ring, napi);
+       int pkts_processed;
        int budget_org = budget;
-       ring->budget = budget;
 
+       ring->budget = budget;
+       ring->pkts_processed = 0;
        vxge_hw_vpath_poll_rx(ring->handle);
+       pkts_processed = ring->pkts_processed;
 
        if (ring->pkts_processed < budget_org) {
                napi_complete(napi);
+
                /* Re enable the Rx interrupts for the vpath */
                vxge_hw_channel_msix_unmask(
                                (struct __vxge_hw_channel *)ring->handle,
                                ring->rx_vector_no);
+               mmiowb();
        }
 
-       return ring->pkts_processed;
+       /* We are copying and returning the local variable, in case if after
+        * clearing the msix interrupt above, if the interrupt fires right
+        * away which can preempt this NAPI thread */
+       return pkts_processed;
 }
 
 static int vxge_poll_inta(struct napi_struct *napi, int budget)
@@ -1824,6 +1861,7 @@ static int vxge_poll_inta(struct napi_struct *napi, int budget)
        for (i = 0; i < vdev->no_of_vpath; i++) {
                ring = &vdev->vpaths[i].ring;
                ring->budget = budget;
+               ring->pkts_processed = 0;
                vxge_hw_vpath_poll_rx(ring->handle);
                pkts_processed += ring->pkts_processed;
                budget -= ring->pkts_processed;
@@ -2054,6 +2092,7 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
                                        netdev_get_tx_queue(vdev->ndev, 0);
                        vpath->fifo.indicate_max_pkts =
                                vdev->config.fifo_indicate_max_pkts;
+                       vpath->fifo.tx_vector_no = 0;
                        vpath->ring.rx_vector_no = 0;
                        vpath->ring.rx_csum = vdev->rx_csum;
                        vpath->ring.rx_hwts = vdev->rx_hwts;
@@ -2079,6 +2118,61 @@ static int vxge_open_vpaths(struct vxgedev *vdev)
        return VXGE_HW_OK;
 }
 
+/**
+ *  adaptive_coalesce_tx_interrupts - Changes the interrupt coalescing
+ *  if the interrupts are not within a range
+ *  @fifo: pointer to transmit fifo structure
+ *  Description: The function changes boundary timer and restriction timer
+ *  value depends on the traffic
+ *  Return Value: None
+ */
+static void adaptive_coalesce_tx_interrupts(struct vxge_fifo *fifo)
+{
+       fifo->interrupt_count++;
+       if (jiffies > fifo->jiffies + HZ / 100) {
+               struct __vxge_hw_fifo *hw_fifo = fifo->handle;
+
+               fifo->jiffies = jiffies;
+               if (fifo->interrupt_count > VXGE_T1A_MAX_TX_INTERRUPT_COUNT &&
+                   hw_fifo->rtimer != VXGE_TTI_RTIMER_ADAPT_VAL) {
+                       hw_fifo->rtimer = VXGE_TTI_RTIMER_ADAPT_VAL;
+                       vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo);
+               } else if (hw_fifo->rtimer != 0) {
+                       hw_fifo->rtimer = 0;
+                       vxge_hw_vpath_dynamic_tti_rtimer_set(hw_fifo);
+               }
+               fifo->interrupt_count = 0;
+       }
+}
+
+/**
+ *  adaptive_coalesce_rx_interrupts - Changes the interrupt coalescing
+ *  if the interrupts are not within a range
+ *  @ring: pointer to receive ring structure
+ *  Description: The function increases of decreases the packet counts within
+ *  the ranges of traffic utilization, if the interrupts due to this ring are
+ *  not within a fixed range.
+ *  Return Value: Nothing
+ */
+static void adaptive_coalesce_rx_interrupts(struct vxge_ring *ring)
+{
+       ring->interrupt_count++;
+       if (jiffies > ring->jiffies + HZ / 100) {
+               struct __vxge_hw_ring *hw_ring = ring->handle;
+
+               ring->jiffies = jiffies;
+               if (ring->interrupt_count > VXGE_T1A_MAX_INTERRUPT_COUNT &&
+                   hw_ring->rtimer != VXGE_RTI_RTIMER_ADAPT_VAL) {
+                       hw_ring->rtimer = VXGE_RTI_RTIMER_ADAPT_VAL;
+                       vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring);
+               } else if (hw_ring->rtimer != 0) {
+                       hw_ring->rtimer = 0;
+                       vxge_hw_vpath_dynamic_rti_rtimer_set(hw_ring);
+               }
+               ring->interrupt_count = 0;
+       }
+}
+
 /*
  *  vxge_isr_napi
  *  @irq: the irq of the device.
@@ -2139,24 +2233,39 @@ static irqreturn_t vxge_isr_napi(int irq, void *dev_id)
 
 #ifdef CONFIG_PCI_MSI
 
-static irqreturn_t
-vxge_tx_msix_handle(int irq, void *dev_id)
+static irqreturn_t vxge_tx_msix_handle(int irq, void *dev_id)
 {
        struct vxge_fifo *fifo = (struct vxge_fifo *)dev_id;
 
+       adaptive_coalesce_tx_interrupts(fifo);
+
+       vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)fifo->handle,
+                                 fifo->tx_vector_no);
+
+       vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)fifo->handle,
+                                  fifo->tx_vector_no);
+
        VXGE_COMPLETE_VPATH_TX(fifo);
 
+       vxge_hw_channel_msix_unmask((struct __vxge_hw_channel *)fifo->handle,
+                                   fifo->tx_vector_no);
+
+       mmiowb();
+
        return IRQ_HANDLED;
 }
 
-static irqreturn_t
-vxge_rx_msix_napi_handle(int irq, void *dev_id)
+static irqreturn_t vxge_rx_msix_napi_handle(int irq, void *dev_id)
 {
        struct vxge_ring *ring = (struct vxge_ring *)dev_id;
 
-       /* MSIX_IDX for Rx is 1 */
+       adaptive_coalesce_rx_interrupts(ring);
+
        vxge_hw_channel_msix_mask((struct __vxge_hw_channel *)ring->handle,
-                                       ring->rx_vector_no);
+                                 ring->rx_vector_no);
+
+       vxge_hw_channel_msix_clear((struct __vxge_hw_channel *)ring->handle,
+                                  ring->rx_vector_no);
 
        napi_schedule(&ring->napi);
        return IRQ_HANDLED;
@@ -2173,14 +2282,20 @@ vxge_alarm_msix_handle(int irq, void *dev_id)
                VXGE_HW_VPATH_MSIX_ACTIVE) + VXGE_ALARM_MSIX_ID;
 
        for (i = 0; i < vdev->no_of_vpath; i++) {
+               /* Reduce the chance of loosing alarm interrupts by masking
+                * the vector. A pending bit will be set if an alarm is
+                * generated and on unmask the interrupt will be fired.
+                */
                vxge_hw_vpath_msix_mask(vdev->vpaths[i].handle, msix_id);
+               vxge_hw_vpath_msix_clear(vdev->vpaths[i].handle, msix_id);
+               mmiowb();
 
                status = vxge_hw_vpath_alarm_process(vdev->vpaths[i].handle,
                        vdev->exec_mode);
                if (status == VXGE_HW_OK) {
-
                        vxge_hw_vpath_msix_unmask(vdev->vpaths[i].handle,
-                                       msix_id);
+                                                 msix_id);
+                       mmiowb();
                        continue;
                }
                vxge_debug_intr(VXGE_ERR,
@@ -2299,6 +2414,9 @@ static int vxge_enable_msix(struct vxgedev *vdev)
                        vpath->ring.rx_vector_no = (vpath->device_id *
                                                VXGE_HW_VPATH_MSIX_ACTIVE) + 1;
 
+                       vpath->fifo.tx_vector_no = (vpath->device_id *
+                                               VXGE_HW_VPATH_MSIX_ACTIVE);
+
                        vxge_hw_vpath_msix_set(vpath->handle, tim_msix_id,
                                               VXGE_ALARM_MSIX_ID);
                }
@@ -2474,8 +2592,9 @@ INTA_MODE:
                        "%s:vxge:INTA", vdev->ndev->name);
                vxge_hw_device_set_intr_type(vdev->devh,
                        VXGE_HW_INTR_MODE_IRQLINE);
-               vxge_hw_vpath_tti_ci_set(vdev->devh,
-                       vdev->vpaths[0].device_id);
+
+               vxge_hw_vpath_tti_ci_set(vdev->vpaths[0].fifo.handle);
+
                ret = request_irq((int) vdev->pdev->irq,
                        vxge_isr_napi,
                        IRQF_SHARED, vdev->desc[0], vdev);
@@ -2745,6 +2864,10 @@ static int vxge_open(struct net_device *dev)
        }
 
        netif_tx_start_all_queues(vdev->ndev);
+
+       /* configure CI */
+       vxge_config_ci_for_tti_rti(vdev);
+
        goto out0;
 
 out2:
@@ -3348,7 +3471,7 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
                vxge_debug_init(VXGE_ERR,
                        "%s: vpath memory allocation failed",
                        vdev->ndev->name);
-               ret = -ENODEV;
+               ret = -ENOMEM;
                goto _out1;
        }
 
@@ -3369,11 +3492,11 @@ static int __devinit vxge_device_register(struct __vxge_hw_device *hldev,
        if (vdev->config.gro_enable)
                ndev->features |= NETIF_F_GRO;
 
-       if (register_netdev(ndev)) {
+       ret = register_netdev(ndev);
+       if (ret) {
                vxge_debug_init(vxge_hw_device_trace_level_get(hldev),
                        "%s: %s : device registration failed!",
                        ndev->name, __func__);
-               ret = -ENODEV;
                goto _out2;
        }
 
@@ -3444,6 +3567,11 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
        /* in 2.6 will call stop() if device is up */
        unregister_netdev(dev);
 
+       kfree(vdev->vpaths);
+
+       /* we are safe to free it now */
+       free_netdev(dev);
+
        vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered",
                        buf);
        vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d  Exiting...", buf,
@@ -3799,7 +3927,7 @@ static void __devinit vxge_device_config_init(
                break;
 
        case MSI_X:
-               device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX;
+               device_config->intr_mode = VXGE_HW_INTR_MODE_MSIX_ONE_SHOT;
                break;
        }
 
@@ -4335,10 +4463,10 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
                goto _exit1;
        }
 
-       if (pci_request_region(pdev, 0, VXGE_DRIVER_NAME)) {
+       ret = pci_request_region(pdev, 0, VXGE_DRIVER_NAME);
+       if (ret) {
                vxge_debug_init(VXGE_ERR,
                        "%s : request regions failed", __func__);
-               ret = -ENODEV;
                goto _exit1;
        }
 
@@ -4446,7 +4574,7 @@ vxge_probe(struct pci_dev *pdev, const struct pci_device_id *pre)
                        if (!img[i].is_valid)
                                break;
                        vxge_debug_init(VXGE_TRACE, "%s: EPROM %d, version "
-                                       "%d.%d.%d.%d\n", VXGE_DRIVER_NAME, i,
+                                       "%d.%d.%d.%d", VXGE_DRIVER_NAME, i,
                                        VXGE_EPROM_IMG_MAJOR(img[i].version),
                                        VXGE_EPROM_IMG_MINOR(img[i].version),
                                        VXGE_EPROM_IMG_FIX(img[i].version),
@@ -4643,8 +4771,9 @@ _exit6:
 _exit5:
        vxge_device_unregister(hldev);
 _exit4:
-       pci_disable_sriov(pdev);
+       pci_set_drvdata(pdev, NULL);
        vxge_hw_device_terminate(hldev);
+       pci_disable_sriov(pdev);
 _exit3:
        iounmap(attr.bar0);
 _exit2:
@@ -4655,7 +4784,7 @@ _exit0:
        kfree(ll_config);
        kfree(device_config);
        driver_config->config_dev_cnt--;
-       pci_set_drvdata(pdev, NULL);
+       driver_config->total_dev_cnt--;
        return ret;
 }
 
@@ -4668,45 +4797,34 @@ _exit0:
 static void __devexit vxge_remove(struct pci_dev *pdev)
 {
        struct __vxge_hw_device *hldev;
-       struct vxgedev *vdev = NULL;
-       struct net_device *dev;
-       int i = 0;
+       struct vxgedev *vdev;
+       int i;
 
        hldev = pci_get_drvdata(pdev);
-
        if (hldev == NULL)
                return;
 
-       dev = hldev->ndev;
-       vdev = netdev_priv(dev);
+       vdev = netdev_priv(hldev->ndev);
 
        vxge_debug_entryexit(vdev->level_trace, "%s:%d", __func__, __LINE__);
-
        vxge_debug_init(vdev->level_trace, "%s : removing PCI device...",
                        __func__);
-       vxge_device_unregister(hldev);
 
-       for (i = 0; i < vdev->no_of_vpath; i++) {
+       for (i = 0; i < vdev->no_of_vpath; i++)
                vxge_free_mac_add_list(&vdev->vpaths[i]);
-               vdev->vpaths[i].mcast_addr_cnt = 0;
-               vdev->vpaths[i].mac_addr_cnt = 0;
-       }
-
-       kfree(vdev->vpaths);
 
+       vxge_device_unregister(hldev);
+       pci_set_drvdata(pdev, NULL);
+       /* Do not call pci_disable_sriov here, as it will break child devices */
+       vxge_hw_device_terminate(hldev);
        iounmap(vdev->bar0);
-
-       /* we are safe to free it now */
-       free_netdev(dev);
+       pci_release_region(pdev, 0);
+       pci_disable_device(pdev);
+       driver_config->config_dev_cnt--;
+       driver_config->total_dev_cnt--;
 
        vxge_debug_init(vdev->level_trace, "%s:%d Device unregistered",
                        __func__, __LINE__);
-
-       vxge_hw_device_terminate(hldev);
-
-       pci_disable_device(pdev);
-       pci_release_region(pdev, 0);
-       pci_set_drvdata(pdev, NULL);
        vxge_debug_entryexit(vdev->level_trace, "%s:%d  Exiting...", __func__,
                             __LINE__);
 }
index 5746fed..40474f0 100644 (file)
 #define VXGE_TTI_LTIMER_VAL    1000
 #define VXGE_T1A_TTI_LTIMER_VAL        80
 #define VXGE_TTI_RTIMER_VAL    0
+#define VXGE_TTI_RTIMER_ADAPT_VAL      10
 #define VXGE_T1A_TTI_RTIMER_VAL        400
 #define VXGE_RTI_BTIMER_VAL    250
 #define VXGE_RTI_LTIMER_VAL    100
 #define VXGE_RTI_RTIMER_VAL    0
-#define VXGE_FIFO_INDICATE_MAX_PKTS VXGE_DEF_FIFO_LENGTH
+#define VXGE_RTI_RTIMER_ADAPT_VAL      15
+#define VXGE_FIFO_INDICATE_MAX_PKTS    VXGE_DEF_FIFO_LENGTH
 #define VXGE_ISR_POLLING_CNT   8
 #define VXGE_MAX_CONFIG_DEV    0xFF
 #define VXGE_EXEC_MODE_DISABLE 0
 #define RTI_T1A_RX_UFC_C       50
 #define RTI_T1A_RX_UFC_D       60
 
+/*
+ * The interrupt rate is maintained at 3k per second with the moderation
+ * parameters for most traffic but not all. This is the maximum interrupt
+ * count allowed per function with INTA or per vector in the case of
+ * MSI-X in a 10 millisecond time period. Enabled only for Titan 1A.
+ */
+#define VXGE_T1A_MAX_INTERRUPT_COUNT   100
+#define VXGE_T1A_MAX_TX_INTERRUPT_COUNT        200
 
 /* Milli secs timer period */
 #define VXGE_TIMER_DELAY               10000
@@ -247,6 +257,11 @@ struct vxge_fifo {
        int tx_steering_type;
        int indicate_max_pkts;
 
+       /* Adaptive interrupt moderation parameters used in T1A */
+       unsigned long interrupt_count;
+       unsigned long jiffies;
+
+       u32 tx_vector_no;
        /* Tx stats */
        struct vxge_fifo_stats stats;
 } ____cacheline_aligned;
@@ -271,6 +286,10 @@ struct vxge_ring {
         */
        int driver_id;
 
+       /* Adaptive interrupt moderation parameters used in T1A */
+       unsigned long interrupt_count;
+       unsigned long jiffies;
+
        /* copy of the flag indicating whether rx_csum is to be used */
        u32 rx_csum:1,
            rx_hwts:1;
@@ -286,7 +305,7 @@ struct vxge_ring {
 
        int vlan_tag_strip;
        struct vlan_group *vlgrp;
-       int rx_vector_no;
+       u32 rx_vector_no;
        enum vxge_hw_status last_status;
 
        /* Rx stats */
index 4c10d6c..8674f33 100644 (file)
@@ -218,6 +218,68 @@ exit:
        return status;
 }
 
+void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo)
+{
+       struct vxge_hw_vpath_reg __iomem *vp_reg;
+       struct vxge_hw_vp_config *config;
+       u64 val64;
+
+       if (fifo->config->enable != VXGE_HW_FIFO_ENABLE)
+               return;
+
+       vp_reg = fifo->vp_reg;
+       config = container_of(fifo->config, struct vxge_hw_vp_config, fifo);
+
+       if (config->tti.timer_ci_en != VXGE_HW_TIM_TIMER_CI_ENABLE) {
+               config->tti.timer_ci_en = VXGE_HW_TIM_TIMER_CI_ENABLE;
+               val64 = readq(&vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+               val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
+               fifo->tim_tti_cfg1_saved = val64;
+               writeq(val64, &vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_TX]);
+       }
+}
+
+void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring)
+{
+       u64 val64 = ring->tim_rti_cfg1_saved;
+
+       val64 |= VXGE_HW_TIM_CFG1_INT_NUM_TIMER_CI;
+       ring->tim_rti_cfg1_saved = val64;
+       writeq(val64, &ring->vp_reg->tim_cfg1_int_num[VXGE_HW_VPATH_INTR_RX]);
+}
+
+void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo)
+{
+       u64 val64 = fifo->tim_tti_cfg3_saved;
+       u64 timer = (fifo->rtimer * 1000) / 272;
+
+       val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff);
+       if (timer)
+               val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) |
+                       VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(5);
+
+       writeq(val64, &fifo->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_TX]);
+       /* tti_cfg3_saved is not updated again because it is
+        * initialized at one place only - init time.
+        */
+}
+
+void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring)
+{
+       u64 val64 = ring->tim_rti_cfg3_saved;
+       u64 timer = (ring->rtimer * 1000) / 272;
+
+       val64 &= ~VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(0x3ffffff);
+       if (timer)
+               val64 |= VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_VAL(timer) |
+                       VXGE_HW_TIM_CFG3_INT_NUM_RTIMER_EVENT_SF(4);
+
+       writeq(val64, &ring->vp_reg->tim_cfg3_int_num[VXGE_HW_VPATH_INTR_RX]);
+       /* rti_cfg3_saved is not updated again because it is
+        * initialized at one place only - init time.
+        */
+}
+
 /**
  * vxge_hw_channel_msix_mask - Mask MSIX Vector.
  * @channeh: Channel for rx or tx handle
@@ -253,6 +315,23 @@ vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channel, int msix_id)
                &channel->common_reg->clear_msix_mask_vect[msix_id%4]);
 }
 
+/**
+ * vxge_hw_channel_msix_clear - Unmask the MSIX Vector.
+ * @channel: Channel for rx or tx handle
+ * @msix_id:  MSI ID
+ *
+ * The function unmasks the msix interrupt for the given msix_id
+ * if configured in MSIX oneshot mode
+ *
+ * Returns: 0
+ */
+void vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channel, int msix_id)
+{
+       __vxge_hw_pio_mem_write32_upper(
+               (u32) vxge_bVALn(vxge_mBIT(msix_id >> 2), 0, 32),
+               &channel->common_reg->clr_msix_one_shot_vec[msix_id % 4]);
+}
+
 /**
  * vxge_hw_device_set_intr_type - Updates the configuration
  *             with new interrupt type.
@@ -2190,20 +2269,15 @@ vxge_hw_vpath_msix_set(struct __vxge_hw_vpath_handle *vp, int *tim_msix_id,
 
        if (vpath->hldev->config.intr_mode ==
                                        VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) {
+               __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
+                               VXGE_HW_ONE_SHOT_VECT0_EN_ONE_SHOT_VECT0_EN,
+                               0, 32), &vp_reg->one_shot_vect0_en);
                __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
                                VXGE_HW_ONE_SHOT_VECT1_EN_ONE_SHOT_VECT1_EN,
                                0, 32), &vp_reg->one_shot_vect1_en);
-       }
-
-       if (vpath->hldev->config.intr_mode ==
-               VXGE_HW_INTR_MODE_MSIX_ONE_SHOT) {
                __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
                                VXGE_HW_ONE_SHOT_VECT2_EN_ONE_SHOT_VECT2_EN,
                                0, 32), &vp_reg->one_shot_vect2_en);
-
-               __vxge_hw_pio_mem_write32_upper((u32)vxge_bVALn(
-                               VXGE_HW_ONE_SHOT_VECT3_EN_ONE_SHOT_VECT3_EN,
-                               0, 32), &vp_reg->one_shot_vect3_en);
        }
 }
 
@@ -2228,6 +2302,32 @@ vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vp, int msix_id)
                &hldev->common_reg->set_msix_mask_vect[msix_id % 4]);
 }
 
+/**
+ * vxge_hw_vpath_msix_clear - Clear MSIX Vector.
+ * @vp: Virtual Path handle.
+ * @msix_id:  MSI ID
+ *
+ * The function clears the msix interrupt for the given msix_id
+ *
+ * Returns: 0,
+ * Otherwise, VXGE_HW_ERR_WRONG_IRQ if the msix index is out of range
+ * status.
+ * See also:
+ */
+void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id)
+{
+       struct __vxge_hw_device *hldev = vp->vpath->hldev;
+
+       if ((hldev->config.intr_mode == VXGE_HW_INTR_MODE_MSIX_ONE_SHOT))
+               __vxge_hw_pio_mem_write32_upper(
+                       (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
+                       &hldev->common_reg->clr_msix_one_shot_vec[msix_id % 4]);
+       else
+               __vxge_hw_pio_mem_write32_upper(
+                       (u32) vxge_bVALn(vxge_mBIT((msix_id >> 2)), 0, 32),
+                       &hldev->common_reg->clear_msix_mask_vect[msix_id % 4]);
+}
+
 /**
  * vxge_hw_vpath_msix_unmask - Unmask the MSIX Vector.
  * @vp: Virtual Path handle.
index d48486d..9d9dfda 100644 (file)
@@ -2142,6 +2142,10 @@ void vxge_hw_device_clear_tx_rx(
  *  Virtual Paths
  */
 
+void vxge_hw_vpath_dynamic_rti_rtimer_set(struct __vxge_hw_ring *ring);
+
+void vxge_hw_vpath_dynamic_tti_rtimer_set(struct __vxge_hw_fifo *fifo);
+
 u32 vxge_hw_vpath_id(
        struct __vxge_hw_vpath_handle *vpath_handle);
 
@@ -2245,6 +2249,8 @@ void
 vxge_hw_vpath_msix_mask(struct __vxge_hw_vpath_handle *vpath_handle,
                        int msix_id);
 
+void vxge_hw_vpath_msix_clear(struct __vxge_hw_vpath_handle *vp, int msix_id);
+
 void vxge_hw_device_flush_io(struct __vxge_hw_device *devh);
 
 void
@@ -2269,6 +2275,9 @@ vxge_hw_channel_msix_mask(struct __vxge_hw_channel *channelh, int msix_id);
 void
 vxge_hw_channel_msix_unmask(struct __vxge_hw_channel *channelh, int msix_id);
 
+void
+vxge_hw_channel_msix_clear(struct __vxge_hw_channel *channelh, int msix_id);
+
 void
 vxge_hw_channel_dtr_try_complete(struct __vxge_hw_channel *channel,
                                 void **dtrh);
@@ -2282,7 +2291,8 @@ vxge_hw_channel_dtr_free(struct __vxge_hw_channel *channel, void *dtrh);
 int
 vxge_hw_channel_dtr_count(struct __vxge_hw_channel *channel);
 
-void
-vxge_hw_vpath_tti_ci_set(struct __vxge_hw_device *hldev, u32 vp_id);
+void vxge_hw_vpath_tti_ci_set(struct __vxge_hw_fifo *fifo);
+
+void vxge_hw_vpath_dynamic_rti_ci_set(struct __vxge_hw_ring *ring);
 
 #endif
index ad2f99b..581e215 100644 (file)
@@ -16,8 +16,8 @@
 
 #define VXGE_VERSION_MAJOR     "2"
 #define VXGE_VERSION_MINOR     "5"
-#define VXGE_VERSION_FIX       "1"
-#define VXGE_VERSION_BUILD     "22082"
+#define VXGE_VERSION_FIX       "2"
+#define VXGE_VERSION_BUILD     "22259"
 #define VXGE_VERSION_FOR       "k"
 
 #define VXGE_FW_VER(maj, min, bld) (((maj) << 16) + ((min) << 8) + (bld))
index 359df04..9d339eb 100644 (file)
 #define AUDIT_BPRM_FCAPS       1321    /* Information about fcaps increasing perms */
 #define AUDIT_CAPSET           1322    /* Record showing argument to sys_capset */
 #define AUDIT_MMAP             1323    /* Record showing descriptor and flags in mmap */
+#define AUDIT_NETFILTER_PKT    1324    /* Packets traversing netfilter chains */
+#define AUDIT_NETFILTER_CFG    1325    /* Netfilter chain modifications */
 
 #define AUDIT_AVC              1400    /* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR      1401    /* Internal SE Linux Errors */
index 010e2d8..d638e85 100644 (file)
@@ -279,8 +279,6 @@ enum dccp_state {
        DCCP_MAX_STATES
 };
 
-#define DCCP_STATE_MASK 0x1f
-
 enum {
        DCCPF_OPEN            = TCPF_ESTABLISHED,
        DCCPF_REQUESTING      = TCPF_SYN_SENT,
index 6485d2a..f4a2e6b 100644 (file)
@@ -135,6 +135,7 @@ enum {
        IFLA_VF_PORTS,
        IFLA_PORT_SELF,
        IFLA_AF_SPEC,
+       IFLA_GROUP,             /* Group the device belongs to */
        __IFLA_MAX
 };
 
index 5f43a3b..4deb383 100644 (file)
 #define IP_VS_CONN_F_TEMPLATE  0x1000          /* template, not connection */
 #define IP_VS_CONN_F_ONE_PACKET        0x2000          /* forward only one packet */
 
+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
+                                 IP_VS_CONN_F_NOOUTPUT | \
+                                 IP_VS_CONN_F_INACTIVE | \
+                                 IP_VS_CONN_F_SEQ_MASK | \
+                                 IP_VS_CONN_F_NO_CPORT | \
+                                 IP_VS_CONN_F_TEMPLATE \
+                                )
+
 /* Flags that are not sent to backup server start from bit 16 */
 #define IP_VS_CONN_F_NFCT      (1 << 16)       /* use netfilter conntrack */
 
index d971346..371fa88 100644 (file)
@@ -75,6 +75,9 @@ struct wireless_dev;
 #define NET_RX_SUCCESS         0       /* keep 'em coming, baby */
 #define NET_RX_DROP            1       /* packet dropped */
 
+/* Initial net device group. All devices belong to group 0 by default. */
+#define INIT_NETDEV_GROUP      0
+
 /*
  * Transmit return codes: transmit return codes originate from three different
  * namespaces:
@@ -643,6 +646,14 @@ struct xps_dev_maps {
     (nr_cpu_ids * sizeof(struct xps_map *)))
 #endif /* CONFIG_XPS */
 
+#define TC_MAX_QUEUE   16
+#define TC_BITMASK     15
+/* HW offloaded queuing disciplines txq count and offset maps */
+struct netdev_tc_txq {
+       u16 count;
+       u16 offset;
+};
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -753,6 +764,11 @@ struct xps_dev_maps {
  * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
  *                       struct nlattr *port[]);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
+ *     Called to setup 'tc' number of traffic classes in the net device. This
+ *     is always called from the stack with the rtnl lock held and netif tx
+ *     queues stopped. This allows the netdevice to perform queue management
+ *     safely.
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -811,6 +827,7 @@ struct net_device_ops {
                                                   struct nlattr *port[]);
        int                     (*ndo_get_vf_port)(struct net_device *dev,
                                                   int vf, struct sk_buff *skb);
+       int                     (*ndo_setup_tc)(struct net_device *dev, u8 tc);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
        int                     (*ndo_fcoe_enable)(struct net_device *dev);
        int                     (*ndo_fcoe_disable)(struct net_device *dev);
@@ -1143,6 +1160,9 @@ struct net_device {
        /* Data Center Bridging netlink ops */
        const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
+       u8 num_tc;
+       struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+       u8 prio_tc_map[TC_BITMASK + 1];
 
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
        /* max exchange id for FCoE LRO by ddp */
@@ -1153,11 +1173,65 @@ struct net_device {
 
        /* phy device may attach itself for hardware timestamping */
        struct phy_device *phydev;
+
+       /* group the device belongs to */
+       int group;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
 #define        NETDEV_ALIGN            32
 
+static inline
+int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
+{
+       return dev->prio_tc_map[prio & TC_BITMASK];
+}
+
+static inline
+int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+       dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
+       return 0;
+}
+
+static inline
+void netdev_reset_tc(struct net_device *dev)
+{
+       dev->num_tc = 0;
+       memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+       memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+
+static inline
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+       dev->tc_to_txq[tc].count = count;
+       dev->tc_to_txq[tc].offset = offset;
+       return 0;
+}
+
+static inline
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+       if (num_tc > TC_MAX_QUEUE)
+               return -EINVAL;
+
+       dev->num_tc = num_tc;
+       return 0;
+}
+
+static inline
+int netdev_get_num_tc(struct net_device *dev)
+{
+       return dev->num_tc;
+}
+
 static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
                                         unsigned int index)
@@ -1844,6 +1918,7 @@ extern int                dev_set_alias(struct net_device *, const char *, size_t);
 extern int             dev_change_net_namespace(struct net_device *,
                                                 struct net *, const char *);
 extern int             dev_set_mtu(struct net_device *, int);
+extern void            dev_set_group(struct net_device *, int);
 extern int             dev_set_mac_address(struct net_device *,
                                            struct sockaddr *);
 extern int             dev_hard_start_xmit(struct sk_buff *skb,
index 1893837..eeec00a 100644 (file)
 #define NF_MAX_VERDICT NF_STOP
 
 /* we overload the higher bits for encoding auxiliary data such as the queue
- * number. Not nice, but better than additional function arguments. */
-#define NF_VERDICT_MASK 0x0000ffff
-#define NF_VERDICT_BITS 16
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS   0x00008000
 
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
 #define NF_VERDICT_QMASK 0xffff0000
 #define NF_VERDICT_QBITS 16
 
-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
 
-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
 
 /* only for userspace compatibility */
 #ifndef __KERNEL__
@@ -41,6 +45,9 @@
    <= 0x2000 is used for protocol-flags. */
 #define NFC_UNKNOWN 0x4000
 #define NFC_ALTERED 0x8000
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
 #endif
 
 enum nf_inet_hooks {
@@ -72,6 +79,10 @@ union nf_inet_addr {
 
 #ifdef __KERNEL__
 #ifdef CONFIG_NETFILTER
+static inline int NF_DROP_GETERR(int verdict)
+{
+       return -(verdict >> NF_VERDICT_QBITS);
+}
 
 static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
                                   const union nf_inet_addr *a2)
@@ -267,7 +278,7 @@ struct nf_afinfo {
        int             route_key_size;
 };
 
-extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];
+extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
 static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
 {
        return rcu_dereference(nf_afinfo[family]);
@@ -357,9 +368,9 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 #endif /*CONFIG_NETFILTER*/
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;
 extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);
-extern void (*nf_ct_destroy)(struct nf_conntrack *);
+extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;
 #else
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
 #endif
index 9d40eff..89c0d1e 100644 (file)
@@ -9,6 +9,7 @@ header-y += nfnetlink_conntrack.h
 header-y += nfnetlink_log.h
 header-y += nfnetlink_queue.h
 header-y += x_tables.h
+header-y += xt_AUDIT.h
 header-y += xt_CHECKSUM.h
 header-y += xt_CLASSIFY.h
 header-y += xt_CONNMARK.h
@@ -55,6 +56,7 @@ header-y += xt_rateest.h
 header-y += xt_realm.h
 header-y += xt_recent.h
 header-y += xt_sctp.h
+header-y += xt_socket.h
 header-y += xt_state.h
 header-y += xt_statistic.h
 header-y += xt_string.h
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h
new file mode 100644 (file)
index 0000000..064bc63
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef _NF_CONNTRACK_SNMP_H
+#define _NF_CONNTRACK_SNMP_H
+
+extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+                               unsigned int protoff,
+                               struct nf_conn *ct,
+                               enum ip_conntrack_info ctinfo);
+
+#endif /* _NF_CONNTRACK_SNMP_H */
index 19711e3..debf1ae 100644 (file)
@@ -42,6 +42,7 @@ enum ctattr_type {
        CTA_SECMARK,            /* obsolete */
        CTA_ZONE,
        CTA_SECCTX,
+       CTA_TIMESTAMP,
        __CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -127,6 +128,14 @@ enum ctattr_counters {
 };
 #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
 
+enum ctattr_tstamp {
+       CTA_TIMESTAMP_UNSPEC,
+       CTA_TIMESTAMP_START,
+       CTA_TIMESTAMP_STOP,
+       __CTA_TIMESTAMP_MAX
+};
+#define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1)
+
 enum ctattr_nat {
        CTA_NAT_UNSPEC,
        CTA_NAT_MINIP,
index 6712e71..3721952 100644 (file)
@@ -611,8 +611,9 @@ struct _compat_xt_align {
 extern void xt_compat_lock(u_int8_t af);
 extern void xt_compat_unlock(u_int8_t af);
 
-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);
+extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);
 extern void xt_compat_flush_offsets(u_int8_t af);
+extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);
 extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);
 
 extern int xt_compat_match_offset(const struct xt_match *match);
diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/linux/netfilter/xt_AUDIT.h
new file mode 100644 (file)
index 0000000..38751d2
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Header file for iptables xt_AUDIT target
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _XT_AUDIT_TARGET_H
+#define _XT_AUDIT_TARGET_H
+
+#include <linux/types.h>
+
+enum {
+       XT_AUDIT_TYPE_ACCEPT = 0,
+       XT_AUDIT_TYPE_DROP,
+       XT_AUDIT_TYPE_REJECT,
+       __XT_AUDIT_TYPE_MAX,
+};
+
+#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)
+
+struct xt_audit_info {
+       __u8 type; /* XT_AUDIT_TYPE_* */
+};
+
+#endif /* _XT_AUDIT_TARGET_H */
index 1b56410..b56e768 100644 (file)
@@ -1,14 +1,16 @@
 #ifndef _XT_CT_H
 #define _XT_CT_H
 
+#include <linux/types.h>
+
 #define XT_CT_NOTRACK  0x1
 
 struct xt_ct_target_info {
-       u_int16_t       flags;
-       u_int16_t       zone;
-       u_int32_t       ct_events;
-       u_int32_t       exp_events;
-       char            helper[16];
+       __u16 flags;
+       __u16 zone;
+       __u32 ct_events;
+       __u32 exp_events;
+       char helper[16];
 
        /* Used internally by the kernel */
        struct nf_conn  *ct __attribute__((aligned(8)));
index 2584f4a..9eafdbb 100644 (file)
@@ -20,4 +20,10 @@ struct xt_NFQ_info_v1 {
        __u16 queues_total;
 };
 
+struct xt_NFQ_info_v2 {
+       __u16 queuenum;
+       __u16 queues_total;
+       __u16 bypass;
+};
+
 #endif /* _XT_NFQ_TARGET_H */
index 2db5432..7157318 100644 (file)
@@ -1,13 +1,15 @@
 #ifndef _XT_TCPOPTSTRIP_H
 #define _XT_TCPOPTSTRIP_H
 
+#include <linux/types.h>
+
 #define tcpoptstrip_set_bit(bmap, idx) \
        (bmap[(idx) >> 5] |= 1U << (idx & 31))
 #define tcpoptstrip_test_bit(bmap, idx) \
        (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0)
 
 struct xt_tcpoptstrip_target_info {
-       u_int32_t strip_bmap[8];
+       __u32 strip_bmap[8];
 };
 
 #endif /* _XT_TCPOPTSTRIP_H */
index 3f3d693..902043c 100644 (file)
@@ -1,19 +1,21 @@
 #ifndef _XT_TPROXY_H
 #define _XT_TPROXY_H
 
+#include <linux/types.h>
+
 /* TPROXY target is capable of marking the packet to perform
  * redirection. We can get rid of that whenever we get support for
  * mutliple targets in the same rule. */
 struct xt_tproxy_target_info {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
        __be32 laddr;
        __be16 lport;
 };
 
 struct xt_tproxy_target_info_v1 {
-       u_int32_t mark_mask;
-       u_int32_t mark_value;
+       __u32 mark_mask;
+       __u32 mark_value;
        union nf_inet_addr laddr;
        __be16 lport;
 };
index 8866826..9b883c8 100644 (file)
@@ -1,15 +1,17 @@
 #ifndef _XT_CLUSTER_MATCH_H
 #define _XT_CLUSTER_MATCH_H
 
+#include <linux/types.h>
+
 enum xt_cluster_flags {
        XT_CLUSTER_F_INV        = (1 << 0)
 };
 
 struct xt_cluster_match_info {
-       u_int32_t               total_nodes;
-       u_int32_t               node_mask;
-       u_int32_t               hash_seed;
-       u_int32_t               flags;
+       __u32 total_nodes;
+       __u32 node_mask;
+       __u32 hash_seed;
+       __u32 flags;
 };
 
 #define XT_CLUSTER_NODES_MAX   32
index eacfedc..0ea5e79 100644 (file)
@@ -4,7 +4,7 @@
 #define XT_MAX_COMMENT_LEN 256
 
 struct xt_comment_info {
-       unsigned char comment[XT_MAX_COMMENT_LEN];
+       char comment[XT_MAX_COMMENT_LEN];
 };
 
 #endif /* XT_COMMENT_H */
index 7e3284b..0ca66e9 100644 (file)
@@ -1,8 +1,15 @@
 #ifndef _XT_CONNLIMIT_H
 #define _XT_CONNLIMIT_H
 
+#include <linux/types.h>
+
 struct xt_connlimit_data;
 
+enum {
+       XT_CONNLIMIT_INVERT = 1 << 0,
+       XT_CONNLIMIT_DADDR  = 1 << 1,
+};
+
 struct xt_connlimit_info {
        union {
                union nf_inet_addr mask;
@@ -13,7 +20,14 @@ struct xt_connlimit_info {
                };
 #endif
        };
-       unsigned int limit, inverse;
+       unsigned int limit;
+       union {
+               /* revision 0 */
+               unsigned int inverse;
+
+               /* revision 1 */
+               __u32 flags;
+       };
 
        /* Used internally by the kernel */
        struct xt_connlimit_data *data __attribute__((aligned(8)));
index 54f47a2..74b904d 100644 (file)
@@ -58,4 +58,19 @@ struct xt_conntrack_mtinfo2 {
        __u16 state_mask, status_mask;
 };
 
+struct xt_conntrack_mtinfo3 {
+       union nf_inet_addr origsrc_addr, origsrc_mask;
+       union nf_inet_addr origdst_addr, origdst_mask;
+       union nf_inet_addr replsrc_addr, replsrc_mask;
+       union nf_inet_addr repldst_addr, repldst_mask;
+       __u32 expires_min, expires_max;
+       __u16 l4proto;
+       __u16 origsrc_port, origdst_port;
+       __u16 replsrc_port, repldst_port;
+       __u16 match_flags, invert_flags;
+       __u16 state_mask, status_mask;
+       __u16 origsrc_port_high, origdst_port_high;
+       __u16 replsrc_port_high, repldst_port_high;
+};
+
 #endif /*_XT_CONNTRACK_H*/
index b0d28c6..ca6e03e 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _XT_QUOTA_H
 #define _XT_QUOTA_H
 
+#include <linux/types.h>
+
 enum xt_quota_flags {
        XT_QUOTA_INVERT         = 0x1,
 };
@@ -9,9 +11,9 @@ enum xt_quota_flags {
 struct xt_quota_priv;
 
 struct xt_quota_info {
-       u_int32_t               flags;
-       u_int32_t               pad;
-       aligned_u64             quota;
+       __u32 flags;
+       __u32 pad;
+       aligned_u64 quota;
 
        /* Used internally by the kernel */
        struct xt_quota_priv    *master;
index 6f475b8..26d7217 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _XT_SOCKET_H
 #define _XT_SOCKET_H
 
+#include <linux/types.h>
+
 enum {
        XT_SOCKET_TRANSPARENT = 1 << 0,
 };
index 14b6df4..7c37fac 100644 (file)
@@ -1,14 +1,16 @@
 #ifndef _XT_TIME_H
 #define _XT_TIME_H 1
 
+#include <linux/types.h>
+
 struct xt_time_info {
-       u_int32_t date_start;
-       u_int32_t date_stop;
-       u_int32_t daytime_start;
-       u_int32_t daytime_stop;
-       u_int32_t monthdays_match;
-       u_int8_t weekdays_match;
-       u_int8_t flags;
+       __u32 date_start;
+       __u32 date_stop;
+       __u32 daytime_start;
+       __u32 daytime_stop;
+       __u32 monthdays_match;
+       __u8 weekdays_match;
+       __u8 flags;
 };
 
 enum {
index 9947f56..04d1bfe 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _XT_U32_H
 #define _XT_U32_H 1
 
+#include <linux/types.h>
+
 enum xt_u32_ops {
        XT_U32_AND,
        XT_U32_LEFTSH,
@@ -9,13 +11,13 @@ enum xt_u32_ops {
 };
 
 struct xt_u32_location_element {
-       u_int32_t number;
-       u_int8_t nextop;
+       __u32 number;
+       __u8 nextop;
 };
 
 struct xt_u32_value_element {
-       u_int32_t min;
-       u_int32_t max;
+       __u32 min;
+       __u32 max;
 };
 
 /*
@@ -27,14 +29,14 @@ struct xt_u32_value_element {
 struct xt_u32_test {
        struct xt_u32_location_element location[XT_U32_MAXSIZE+1];
        struct xt_u32_value_element value[XT_U32_MAXSIZE+1];
-       u_int8_t nnums;
-       u_int8_t nvalues;
+       __u8 nnums;
+       __u8 nvalues;
 };
 
 struct xt_u32 {
        struct xt_u32_test tests[XT_U32_MAXSIZE+1];
-       u_int8_t ntests;
-       u_int8_t invert;
+       __u8 ntests;
+       __u8 invert;
 };
 
 #endif /* _XT_U32_H */
index c73ef0b..be5be15 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_802_3_H
 #define __LINUX_BRIDGE_EBT_802_3_H
 
+#include <linux/types.h>
+
 #define EBT_802_3_SAP 0x01
 #define EBT_802_3_TYPE 0x02
 
 
 /* ui has one byte ctrl, ni has two */
 struct hdr_ui {
-       uint8_t dsap;
-       uint8_t ssap;
-       uint8_t ctrl;
-       uint8_t orig[3];
+       __u8 dsap;
+       __u8 ssap;
+       __u8 ctrl;
+       __u8 orig[3];
        __be16 type;
 };
 
 struct hdr_ni {
-       uint8_t dsap;
-       uint8_t ssap;
+       __u8 dsap;
+       __u8 ssap;
        __be16 ctrl;
-       uint8_t  orig[3];
+       __u8  orig[3];
        __be16 type;
 };
 
 struct ebt_802_3_hdr {
-       uint8_t  daddr[6];
-       uint8_t  saddr[6];
+       __u8  daddr[6];
+       __u8  saddr[6];
        __be16 len;
        union {
                struct hdr_ui ui;
@@ -59,10 +61,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
 #endif
 
 struct ebt_802_3_info {
-       uint8_t  sap;
+       __u8  sap;
        __be16 type;
-       uint8_t  bitmask;
-       uint8_t  invflags;
+       __u8  bitmask;
+       __u8  invflags;
 };
 
 #endif
index 0009558..bd4e3ad 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_AMONG_H
 #define __LINUX_BRIDGE_EBT_AMONG_H
 
+#include <linux/types.h>
+
 #define EBT_AMONG_DST 0x01
 #define EBT_AMONG_SRC 0x02
 
@@ -30,7 +32,7 @@
  */
 
 struct ebt_mac_wormhash_tuple {
-       uint32_t cmp[2];
+       __u32 cmp[2];
        __be32 ip;
 };
 
index cbf4843..522f3e4 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_ARP_H
 #define __LINUX_BRIDGE_EBT_ARP_H
 
+#include <linux/types.h>
+
 #define EBT_ARP_OPCODE 0x01
 #define EBT_ARP_HTYPE 0x02
 #define EBT_ARP_PTYPE 0x04
@@ -27,8 +29,8 @@ struct ebt_arp_info
        unsigned char smmsk[ETH_ALEN];
        unsigned char dmaddr[ETH_ALEN];
        unsigned char dmmsk[ETH_ALEN];
-       uint8_t  bitmask;
-       uint8_t  invflags;
+       __u8  bitmask;
+       __u8  invflags;
 };
 
 #endif
index 6a708fb..c4bbc41 100644 (file)
@@ -15,6 +15,8 @@
 #ifndef __LINUX_BRIDGE_EBT_IP_H
 #define __LINUX_BRIDGE_EBT_IP_H
 
+#include <linux/types.h>
+
 #define EBT_IP_SOURCE 0x01
 #define EBT_IP_DEST 0x02
 #define EBT_IP_TOS 0x04
@@ -31,12 +33,12 @@ struct ebt_ip_info {
        __be32 daddr;
        __be32 smsk;
        __be32 dmsk;
-       uint8_t  tos;
-       uint8_t  protocol;
-       uint8_t  bitmask;
-       uint8_t  invflags;
-       uint16_t sport[2];
-       uint16_t dport[2];
+       __u8  tos;
+       __u8  protocol;
+       __u8  bitmask;
+       __u8  invflags;
+       __u16 sport[2];
+       __u16 dport[2];
 };
 
 #endif
index e5de987..42b8896 100644 (file)
 #ifndef __LINUX_BRIDGE_EBT_IP6_H
 #define __LINUX_BRIDGE_EBT_IP6_H
 
+#include <linux/types.h>
+
 #define EBT_IP6_SOURCE 0x01
 #define EBT_IP6_DEST 0x02
 #define EBT_IP6_TCLASS 0x04
 #define EBT_IP6_PROTO 0x08
 #define EBT_IP6_SPORT 0x10
 #define EBT_IP6_DPORT 0x20
+#define EBT_IP6_ICMP6 0x40
+
 #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\
-                     EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT)
+                     EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \
+                     EBT_IP6_ICMP6)
 #define EBT_IP6_MATCH "ip6"
 
 /* the same values are used for the invflags */
@@ -28,12 +33,18 @@ struct ebt_ip6_info {
        struct in6_addr daddr;
        struct in6_addr smsk;
        struct in6_addr dmsk;
-       uint8_t  tclass;
-       uint8_t  protocol;
-       uint8_t  bitmask;
-       uint8_t  invflags;
-       uint16_t sport[2];
-       uint16_t dport[2];
+       __u8  tclass;
+       __u8  protocol;
+       __u8  bitmask;
+       __u8  invflags;
+       union {
+               __u16 sport[2];
+               __u8 icmpv6_type[2];
+       };
+       union {
+               __u16 dport[2];
+               __u8 icmpv6_code[2];
+       };
 };
 
 #endif
index 4bf76b7..66d80b3 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_LIMIT_H
 #define __LINUX_BRIDGE_EBT_LIMIT_H
 
+#include <linux/types.h>
+
 #define EBT_LIMIT_MATCH "limit"
 
 /* timings are in milliseconds. */
    seconds, or one every 59 hours. */
 
 struct ebt_limit_info {
-       u_int32_t avg;    /* Average secs between packets * scale */
-       u_int32_t burst;  /* Period multiplier for upper limit. */
+       __u32 avg;    /* Average secs between packets * scale */
+       __u32 burst;  /* Period multiplier for upper limit. */
 
        /* Used internally by the kernel */
        unsigned long prev;
-       u_int32_t credit;
-       u_int32_t credit_cap, cost;
+       __u32 credit;
+       __u32 credit_cap, cost;
 };
 
 #endif
index cc2cdfb..7e7f1d1 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_LOG_H
 #define __LINUX_BRIDGE_EBT_LOG_H
 
+#include <linux/types.h>
+
 #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */
 #define EBT_LOG_ARP 0x02
 #define EBT_LOG_NFLOG 0x04
@@ -10,9 +12,9 @@
 #define EBT_LOG_WATCHER "log"
 
 struct ebt_log_info {
-       uint8_t loglevel;
-       uint8_t prefix[EBT_LOG_PREFIX_SIZE];
-       uint32_t bitmask;
+       __u8 loglevel;
+       __u8 prefix[EBT_LOG_PREFIX_SIZE];
+       __u32 bitmask;
 };
 
 #endif
index 9ceb10e..410f9e5 100644 (file)
@@ -1,13 +1,15 @@
 #ifndef __LINUX_BRIDGE_EBT_MARK_M_H
 #define __LINUX_BRIDGE_EBT_MARK_M_H
 
+#include <linux/types.h>
+
 #define EBT_MARK_AND 0x01
 #define EBT_MARK_OR 0x02
 #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
 struct ebt_mark_m_info {
        unsigned long mark, mask;
-       uint8_t invert;
-       uint8_t bitmask;
+       __u8 invert;
+       __u8 bitmask;
 };
 #define EBT_MARK_MATCH "mark_m"
 
index 0528178..df829fc 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_NFLOG_H
 #define __LINUX_BRIDGE_EBT_NFLOG_H
 
+#include <linux/types.h>
+
 #define EBT_NFLOG_MASK 0x0
 
 #define EBT_NFLOG_PREFIX_SIZE 64
 #define EBT_NFLOG_DEFAULT_THRESHOLD    1
 
 struct ebt_nflog_info {
-       u_int32_t len;
-       u_int16_t group;
-       u_int16_t threshold;
-       u_int16_t flags;
-       u_int16_t pad;
+       __u32 len;
+       __u16 group;
+       __u16 threshold;
+       __u16 flags;
+       __u16 pad;
        char prefix[EBT_NFLOG_PREFIX_SIZE];
 };
 
index 51a7998..c241bad 100644 (file)
@@ -1,9 +1,11 @@
 #ifndef __LINUX_BRIDGE_EBT_PKTTYPE_H
 #define __LINUX_BRIDGE_EBT_PKTTYPE_H
 
+#include <linux/types.h>
+
 struct ebt_pkttype_info {
-       uint8_t pkt_type;
-       uint8_t invert;
+       __u8 pkt_type;
+       __u8 invert;
 };
 #define EBT_PKTTYPE_MATCH "pkttype"
 
index e503a0a..1025b9f 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_STP_H
 #define __LINUX_BRIDGE_EBT_STP_H
 
+#include <linux/types.h>
+
 #define EBT_STP_TYPE           0x0001
 
 #define EBT_STP_FLAGS          0x0002
 #define EBT_STP_MATCH "stp"
 
 struct ebt_stp_config_info {
-       uint8_t flags;
-       uint16_t root_priol, root_priou;
+       __u8 flags;
+       __u16 root_priol, root_priou;
        char root_addr[6], root_addrmsk[6];
-       uint32_t root_costl, root_costu;
-       uint16_t sender_priol, sender_priou;
+       __u32 root_costl, root_costu;
+       __u16 sender_priol, sender_priou;
        char sender_addr[6], sender_addrmsk[6];
-       uint16_t portl, portu;
-       uint16_t msg_agel, msg_ageu;
-       uint16_t max_agel, max_ageu;
-       uint16_t hello_timel, hello_timeu;
-       uint16_t forward_delayl, forward_delayu;
+       __u16 portl, portu;
+       __u16 msg_agel, msg_ageu;
+       __u16 max_agel, max_ageu;
+       __u16 hello_timel, hello_timeu;
+       __u16 forward_delayl, forward_delayu;
 };
 
 struct ebt_stp_info {
-       uint8_t type;
+       __u8 type;
        struct ebt_stp_config_info config;
-       uint16_t bitmask;
-       uint16_t invflags;
+       __u16 bitmask;
+       __u16 invflags;
 };
 
 #endif
index b677e26..89a6bec 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _EBT_ULOG_H
 #define _EBT_ULOG_H
 
+#include <linux/types.h>
+
 #define EBT_ULOG_DEFAULT_NLGROUP 0
 #define EBT_ULOG_DEFAULT_QTHRESHOLD 1
 #define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */
@@ -10,7 +12,7 @@
 #define EBT_ULOG_VERSION 1
 
 struct ebt_ulog_info {
-       uint32_t nlgroup;
+       __u32 nlgroup;
        unsigned int cprange;
        unsigned int qthreshold;
        char prefix[EBT_ULOG_PREFIX_LEN];
index 1d98be4..967d1d5 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __LINUX_BRIDGE_EBT_VLAN_H
 #define __LINUX_BRIDGE_EBT_VLAN_H
 
+#include <linux/types.h>
+
 #define EBT_VLAN_ID    0x01
 #define EBT_VLAN_PRIO  0x02
 #define EBT_VLAN_ENCAP 0x04
 #define EBT_VLAN_MATCH "vlan"
 
 struct ebt_vlan_info {
-       uint16_t id;            /* VLAN ID {1-4095} */
-       uint8_t prio;           /* VLAN User Priority {0-7} */
+       __u16 id;               /* VLAN ID {1-4095} */
+       __u8 prio;              /* VLAN User Priority {0-7} */
        __be16 encap;           /* VLAN Encapsulated frame code {0-65535} */
-       uint8_t bitmask;                /* Args bitmask bit 1=1 - ID arg,
+       __u8 bitmask;           /* Args bitmask bit 1=1 - ID arg,
                                   bit 2=1 User-Priority arg, bit 3=1 encap*/
-       uint8_t invflags;               /* Inverse bitmask  bit 1=1 - inversed ID arg, 
+       __u8 invflags;          /* Inverse bitmask  bit 1=1 - inversed ID arg, 
                                   bit 2=1 - inversed Pirority arg */
 };
 
index e5a3687..c6a204c 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _IPT_CLUSTERIP_H_target
 #define _IPT_CLUSTERIP_H_target
 
+#include <linux/types.h>
+
 enum clusterip_hashmode {
     CLUSTERIP_HASHMODE_SIP = 0,
     CLUSTERIP_HASHMODE_SIP_SPT,
@@ -17,15 +19,15 @@ struct clusterip_config;
 
 struct ipt_clusterip_tgt_info {
 
-       u_int32_t flags;
+       __u32 flags;
 
        /* only relevant for new ones */
-       u_int8_t clustermac[6];
-       u_int16_t num_total_nodes;
-       u_int16_t num_local_nodes;
-       u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
-       u_int32_t hash_mode;
-       u_int32_t hash_initval;
+       __u8 clustermac[6];
+       __u16 num_total_nodes;
+       __u16 num_local_nodes;
+       __u16 local_nodes[CLUSTERIP_MAX_NODES];
+       __u32 hash_mode;
+       __u32 hash_initval;
 
        /* Used internally by the kernel */
        struct clusterip_config *config;
index 7ca4591..bb88d53 100644 (file)
@@ -8,6 +8,8 @@
 */
 #ifndef _IPT_ECN_TARGET_H
 #define _IPT_ECN_TARGET_H
+
+#include <linux/types.h>
 #include <linux/netfilter/xt_DSCP.h>
 
 #define IPT_ECN_IP_MASK        (~XT_DSCP_MASK)
 #define IPT_ECN_OP_MASK                0xce
 
 struct ipt_ECN_info {
-       u_int8_t operation;     /* bitset of operations */
-       u_int8_t ip_ect;        /* ECT codepoint of IPv4 header, pre-shifted */
+       __u8 operation; /* bitset of operations */
+       __u8 ip_ect;    /* ECT codepoint of IPv4 header, pre-shifted */
        union {
                struct {
-                       u_int8_t ece:1, cwr:1; /* TCP ECT bits */
+                       __u8 ece:1, cwr:1; /* TCP ECT bits */
                } tcp;
        } proto;
 };
index 2529660..5bca782 100644 (file)
@@ -1,15 +1,17 @@
 #ifndef _IPT_SAME_H
 #define _IPT_SAME_H
 
+#include <linux/types.h>
+
 #define IPT_SAME_MAX_RANGE     10
 
 #define IPT_SAME_NODST         0x01
 
 struct ipt_same_info {
        unsigned char info;
-       u_int32_t rangesize;
-       u_int32_t ipnum;
-       u_int32_t *iparray;
+       __u32 rangesize;
+       __u32 ipnum;
+       __u32 *iparray;
 
        /* hangs off end. */
        struct nf_nat_range range[IPT_SAME_MAX_RANGE];
index ee6611e..f6ac169 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _IPT_TTL_H
 #define _IPT_TTL_H
 
+#include <linux/types.h>
+
 enum {
        IPT_TTL_SET = 0,
        IPT_TTL_INC,
@@ -13,8 +15,8 @@ enum {
 #define IPT_TTL_MAXMODE        IPT_TTL_DEC
 
 struct ipt_TTL_info {
-       u_int8_t        mode;
-       u_int8_t        ttl;
+       __u8    mode;
+       __u8    ttl;
 };
 
 
index 446de6a..0da4223 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _IPT_ADDRTYPE_H
 #define _IPT_ADDRTYPE_H
 
+#include <linux/types.h>
+
 enum {
        IPT_ADDRTYPE_INVERT_SOURCE      = 0x0001,
        IPT_ADDRTYPE_INVERT_DEST        = 0x0002,
@@ -9,17 +11,17 @@ enum {
 };
 
 struct ipt_addrtype_info_v1 {
-       u_int16_t       source;         /* source-type mask */
-       u_int16_t       dest;           /* dest-type mask */
-       u_int32_t       flags;
+       __u16   source;         /* source-type mask */
+       __u16   dest;           /* dest-type mask */
+       __u32   flags;
 };
 
 /* revision 0 */
 struct ipt_addrtype_info {
-       u_int16_t       source;         /* source-type mask */
-       u_int16_t       dest;           /* dest-type mask */
-       u_int32_t       invert_source;
-       u_int32_t       invert_dest;
+       __u16   source;         /* source-type mask */
+       __u16   dest;           /* dest-type mask */
+       __u32   invert_source;
+       __u32   invert_dest;
 };
 
 #endif
index 2e555b4..4e02bb0 100644 (file)
@@ -1,9 +1,11 @@
 #ifndef _IPT_AH_H
 #define _IPT_AH_H
 
+#include <linux/types.h>
+
 struct ipt_ah {
-       u_int32_t spis[2];                      /* Security Parameter Index */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 spis[2];                  /* Security Parameter Index */
+       __u8  invflags;                 /* Inverse flags */
 };
 
 
index 9945baa..eabf95f 100644 (file)
@@ -8,6 +8,8 @@
 */
 #ifndef _IPT_ECN_H
 #define _IPT_ECN_H
+
+#include <linux/types.h>
 #include <linux/netfilter/xt_dscp.h>
 
 #define IPT_ECN_IP_MASK        (~XT_DSCP_MASK)
 
 /* match info */
 struct ipt_ecn_info {
-       u_int8_t operation;
-       u_int8_t invert;
-       u_int8_t ip_ect;
+       __u8 operation;
+       __u8 invert;
+       __u8 ip_ect;
        union {
                struct {
-                       u_int8_t ect;
+                       __u8 ect;
                } tcp;
        } proto;
 };
index ee24fd8..37bee44 100644 (file)
@@ -4,6 +4,8 @@
 #ifndef _IPT_TTL_H
 #define _IPT_TTL_H
 
+#include <linux/types.h>
+
 enum {
        IPT_TTL_EQ = 0,         /* equals */
        IPT_TTL_NE,             /* not equals */
@@ -13,8 +15,8 @@ enum {
 
 
 struct ipt_ttl_info {
-       u_int8_t        mode;
-       u_int8_t        ttl;
+       __u8    mode;
+       __u8    ttl;
 };
 
 
index afb7813..ebd8ead 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef _IP6T_HL_H
 #define _IP6T_HL_H
 
+#include <linux/types.h>
+
 enum {
        IP6T_HL_SET = 0,
        IP6T_HL_INC,
@@ -14,8 +16,8 @@ enum {
 #define IP6T_HL_MAXMODE        IP6T_HL_DEC
 
 struct ip6t_HL_info {
-       u_int8_t        mode;
-       u_int8_t        hop_limit;
+       __u8    mode;
+       __u8    hop_limit;
 };
 
 
index 6be6504..205ed62 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _IP6T_REJECT_H
 #define _IP6T_REJECT_H
 
+#include <linux/types.h>
+
 enum ip6t_reject_with {
        IP6T_ICMP6_NO_ROUTE,
        IP6T_ICMP6_ADM_PROHIBITED,
@@ -12,7 +14,7 @@ enum ip6t_reject_with {
 };
 
 struct ip6t_reject_info {
-       u_int32_t       with;   /* reject type */
+       __u32   with;   /* reject type */
 };
 
 #endif /*_IP6T_REJECT_H*/
index 17a745c..5da2b65 100644 (file)
@@ -1,11 +1,13 @@
 #ifndef _IP6T_AH_H
 #define _IP6T_AH_H
 
+#include <linux/types.h>
+
 struct ip6t_ah {
-       u_int32_t spis[2];                      /* Security Parameter Index */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  hdrres;                       /* Test of the Reserved Filed */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 spis[2];                  /* Security Parameter Index */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  hdrres;                   /* Test of the Reserved Filed */
+       __u8  invflags;                 /* Inverse flags */
 };
 
 #define IP6T_AH_SPI 0x01
index 3724d08..b47f61b 100644 (file)
@@ -1,11 +1,13 @@
 #ifndef _IP6T_FRAG_H
 #define _IP6T_FRAG_H
 
+#include <linux/types.h>
+
 struct ip6t_frag {
-       u_int32_t ids[2];                       /* Security Parameter Index */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  flags;                        /*  */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 ids[2];                   /* Security Parameter Index */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  flags;                    /*  */
+       __u8  invflags;                 /* Inverse flags */
 };
 
 #define IP6T_FRAG_IDS          0x01
index 5ef91b8..6e76dbc 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef _IP6T_HL_H
 #define _IP6T_HL_H
 
+#include <linux/types.h>
+
 enum {
        IP6T_HL_EQ = 0,         /* equals */
        IP6T_HL_NE,             /* not equals */
@@ -14,8 +16,8 @@ enum {
 
 
 struct ip6t_hl_info {
-       u_int8_t        mode;
-       u_int8_t        hop_limit;
+       __u8    mode;
+       __u8    hop_limit;
 };
 
 
index 01dfd44..efae3a2 100644 (file)
@@ -8,10 +8,12 @@ on whether they contain certain headers */
 #ifndef __IPV6HEADER_H
 #define __IPV6HEADER_H
 
+#include <linux/types.h>
+
 struct ip6t_ipv6header_info {
-       u_int8_t matchflags;
-       u_int8_t invflags;
-       u_int8_t modeflag;
+       __u8 matchflags;
+       __u8 invflags;
+       __u8 modeflag;
 };
 
 #define MASK_HOPOPTS    128
index 18549bc..a7729a5 100644 (file)
@@ -1,10 +1,12 @@
 #ifndef _IP6T_MH_H
 #define _IP6T_MH_H
 
+#include <linux/types.h>
+
 /* MH matching stuff */
 struct ip6t_mh {
-       u_int8_t types[2];      /* MH type range */
-       u_int8_t invflags;      /* Inverse flags */
+       __u8 types[2];  /* MH type range */
+       __u8 invflags;  /* Inverse flags */
 };
 
 /* Values for "invflags" field in struct ip6t_mh. */
index 62d89bc..17d419a 100644 (file)
@@ -1,14 +1,16 @@
 #ifndef _IP6T_OPTS_H
 #define _IP6T_OPTS_H
 
+#include <linux/types.h>
+
 #define IP6T_OPTS_OPTSNR 16
 
 struct ip6t_opts {
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t flags;                         /*  */
-       u_int8_t invflags;                      /* Inverse flags */
-       u_int16_t opts[IP6T_OPTS_OPTSNR];       /* opts */
-       u_int8_t optsnr;                        /* Nr of OPts */
+       __u32 hdrlen;                   /* Header Length */
+       __u8 flags;                             /*  */
+       __u8 invflags;                  /* Inverse flags */
+       __u16 opts[IP6T_OPTS_OPTSNR];   /* opts */
+       __u8 optsnr;                    /* Nr of OPts */
 };
 
 #define IP6T_OPTS_LEN          0x01
index ab91bfd..7605a5f 100644 (file)
@@ -1,18 +1,19 @@
 #ifndef _IP6T_RT_H
 #define _IP6T_RT_H
 
+#include <linux/types.h>
 /*#include <linux/in6.h>*/
 
 #define IP6T_RT_HOPS 16
 
 struct ip6t_rt {
-       u_int32_t rt_type;                      /* Routing Type */
-       u_int32_t segsleft[2];                  /* Segments Left */
-       u_int32_t hdrlen;                       /* Header Length */
-       u_int8_t  flags;                        /*  */
-       u_int8_t  invflags;                     /* Inverse flags */
+       __u32 rt_type;                  /* Routing Type */
+       __u32 segsleft[2];                      /* Segments Left */
+       __u32 hdrlen;                   /* Header Length */
+       __u8  flags;                    /*  */
+       __u8  invflags;                 /* Inverse flags */
        struct in6_addr addrs[IP6T_RT_HOPS];    /* Hops */
-       u_int8_t addrnr;                        /* Nr of Addresses */
+       __u8 addrnr;                    /* Nr of Addresses */
 };
 
 #define IP6T_RT_TYP            0x01
index 2cfa4bc..776cd93 100644 (file)
@@ -481,4 +481,16 @@ struct tc_drr_stats {
        __u32   deficit;
 };
 
+/* MQPRIO */
+#define TC_QOPT_BITMASK 15
+#define TC_QOPT_MAX_QUEUE 16
+
+struct tc_mqprio_qopt {
+       __u8    num_tc;
+       __u8    prio_tc_map[TC_QOPT_BITMASK + 1];
+       __u8    hw;
+       __u16   count[TC_QOPT_MAX_QUEUE];
+       __u16   offset[TC_QOPT_MAX_QUEUE];
+};
+
 #endif
index bf221d6..6e946da 100644 (file)
@@ -1801,6 +1801,15 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
                     prefetch(skb->prev), (skb != (struct sk_buff *)(queue));   \
                     skb = skb->prev)
 
+#define skb_queue_reverse_walk_safe(queue, skb, tmp)                           \
+               for (skb = (queue)->prev, tmp = skb->prev;                      \
+                    skb != (struct sk_buff *)(queue);                          \
+                    skb = tmp, tmp = skb->prev)
+
+#define skb_queue_reverse_walk_from_safe(queue, skb, tmp)                      \
+               for (tmp = skb->prev;                                           \
+                    skb != (struct sk_buff *)(queue);                          \
+                    skb = tmp, tmp = skb->prev)
 
 static inline bool skb_has_frag_list(const struct sk_buff *skb)
 {
index 93b0310..be5a0d4 100644 (file)
@@ -72,7 +72,7 @@ struct dst_entry {
 
        u32                     _metrics[RTAX_MAX];
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        __u32                   tclassid;
 #else
        __u32                   __pad2;
index 07bdb5e..65d1fcd 100644 (file)
@@ -55,7 +55,7 @@ struct fib_nh {
        int                     nh_weight;
        int                     nh_power;
 #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        __u32                   nh_tclassid;
 #endif
        int                     nh_oif;
@@ -201,7 +201,7 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp,
 extern int __net_init fib4_rules_init(struct net *net);
 extern void __net_exit fib4_rules_exit(struct net *net);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 extern u32 fib_rules_tclass(struct fib_result *res);
 #endif
 
@@ -235,7 +235,7 @@ extern struct fib_table *fib_hash_table(u32 id);
 
 static inline void fib_combine_itag(u32 *itag, struct fib_result *res)
 {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        u32 rtag;
 #endif
index b7bbd6c..b23bea6 100644 (file)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #endif
+#include <net/net_namespace.h>         /* Netw namespace */
+
+/*
+ * Generic access of ipvs struct
+ */
+static inline struct netns_ipvs *net_ipvs(struct net* net)
+{
+       return net->ipvs;
+}
+/*
+ * Get net ptr from skb in traffic cases
+ * use skb_sknet when call is from userland (ioctl or netlink)
+ */
+static inline struct net *skb_net(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+       /*
+        * This is used for debug only.
+        * Start with the most likely hit
+        * End with BUG
+        */
+       if (likely(skb->dev && skb->dev->nd_net))
+               return dev_net(skb->dev);
+       if (skb_dst(skb)->dev)
+               return dev_net(skb_dst(skb)->dev);
+       WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
+                     __func__, __LINE__);
+       if (likely(skb->sk && skb->sk->sk_net))
+               return sock_net(skb->sk);
+       pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+               __func__, __LINE__);
+       BUG();
+#else
+       return dev_net(skb->dev ? : skb_dst(skb)->dev);
+#endif
+#else
+       return &init_net;
+#endif
+}
+
+static inline struct net *skb_sknet(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+       /* Start with the most likely hit */
+       if (likely(skb->sk && skb->sk->sk_net))
+               return sock_net(skb->sk);
+       WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
+                      __func__, __LINE__);
+       if (likely(skb->dev && skb->dev->nd_net))
+               return dev_net(skb->dev);
+       pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+               __func__, __LINE__);
+       BUG();
+#else
+       return sock_net(skb->sk);
+#endif
+#else
+       return &init_net;
+#endif
+}
+/*
+ * This one needed for single_open_net since net is stored directly in
+ * private not as a struct i.e. seq_file_net cant be used.
+ */
+static inline struct net *seq_file_single_net(struct seq_file *seq)
+{
+#ifdef CONFIG_NET_NS
+       return (struct net *)seq->private;
+#else
+       return &init_net;
+#endif
+}
 
 /* Connections' size value needed by ip_vs_ctl.c */
 extern int ip_vs_conn_tab_size;
@@ -258,6 +332,23 @@ struct ip_vs_seq {
                                                   before last resized pkt */
 };
 
+/*
+ * counters per cpu
+ */
+struct ip_vs_counters {
+       __u32           conns;          /* connections scheduled */
+       __u32           inpkts;         /* incoming packets */
+       __u32           outpkts;        /* outgoing packets */
+       __u64           inbytes;        /* incoming bytes */
+       __u64           outbytes;       /* outgoing bytes */
+};
+/*
+ * Stats per cpu
+ */
+struct ip_vs_cpu_stats {
+       struct ip_vs_counters   ustats;
+       struct u64_stats_sync   syncp;
+};
 
 /*
  *     IPVS statistics objects
@@ -279,17 +370,34 @@ struct ip_vs_estimator {
 };
 
 struct ip_vs_stats {
-       struct ip_vs_stats_user ustats;         /* statistics */
+       struct ip_vs_stats_user ustats;         /* statistics */
        struct ip_vs_estimator  est;            /* estimator */
-
-       spinlock_t              lock;           /* spin lock */
+       struct ip_vs_cpu_stats  *cpustats;      /* per cpu counters */
+       spinlock_t              lock;           /* spin lock */
 };
 
+/*
+ * Helper Macros for per cpu
+ * ipvs->tot_stats->ustats.count
+ */
+#define IPVS_STAT_INC(ipvs, count)     \
+       __this_cpu_inc((ipvs)->ustats->count)
+
+#define IPVS_STAT_ADD(ipvs, count, value) \
+       do {\
+               write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
+                                    raw_smp_processor_id())); \
+               __this_cpu_add((ipvs)->ustats->count, value); \
+               write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
+                                  raw_smp_processor_id())); \
+       } while (0)
+
 struct dst_entry;
 struct iphdr;
 struct ip_vs_conn;
 struct ip_vs_app;
 struct sk_buff;
+struct ip_vs_proto_data;
 
 struct ip_vs_protocol {
        struct ip_vs_protocol   *next;
@@ -297,21 +405,22 @@ struct ip_vs_protocol {
        u16                     protocol;
        u16                     num_states;
        int                     dont_defrag;
-       atomic_t                appcnt;         /* counter of proto app incs */
-       int                     *timeout_table; /* protocol timeout table */
 
        void (*init)(struct ip_vs_protocol *pp);
 
        void (*exit)(struct ip_vs_protocol *pp);
 
+       void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
+       void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
+
        int (*conn_schedule)(int af, struct sk_buff *skb,
-                            struct ip_vs_protocol *pp,
+                            struct ip_vs_proto_data *pd,
                             int *verdict, struct ip_vs_conn **cpp);
 
        struct ip_vs_conn *
        (*conn_in_get)(int af,
                       const struct sk_buff *skb,
-                      struct ip_vs_protocol *pp,
                       const struct ip_vs_iphdr *iph,
                       unsigned int proto_off,
                       int inverse);
@@ -319,7 +428,6 @@ struct ip_vs_protocol {
        struct ip_vs_conn *
        (*conn_out_get)(int af,
                        const struct sk_buff *skb,
-                       struct ip_vs_protocol *pp,
                        const struct ip_vs_iphdr *iph,
                        unsigned int proto_off,
                        int inverse);
@@ -337,11 +445,11 @@ struct ip_vs_protocol {
 
        int (*state_transition)(struct ip_vs_conn *cp, int direction,
                                const struct sk_buff *skb,
-                               struct ip_vs_protocol *pp);
+                               struct ip_vs_proto_data *pd);
 
-       int (*register_app)(struct ip_vs_app *inc);
+       int (*register_app)(struct net *net, struct ip_vs_app *inc);
 
-       void (*unregister_app)(struct ip_vs_app *inc);
+       void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
 
        int (*app_conn_bind)(struct ip_vs_conn *cp);
 
@@ -350,14 +458,26 @@ struct ip_vs_protocol {
                             int offset,
                             const char *msg);
 
-       void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
+       void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
+};
 
-       int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+/*
+ * protocol data per netns
+ */
+struct ip_vs_proto_data {
+       struct ip_vs_proto_data *next;
+       struct ip_vs_protocol   *pp;
+       int                     *timeout_table; /* protocol timeout table */
+       atomic_t                appcnt;         /* counter of proto app incs. */
+       struct tcp_states_t     *tcp_state_table;
 };
 
-extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_protocol   *ip_vs_proto_get(unsigned short proto);
+extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
+                                                    unsigned short proto);
 
 struct ip_vs_conn_param {
+       struct net                      *net;
        const union nf_inet_addr        *caddr;
        const union nf_inet_addr        *vaddr;
        __be16                          cport;
@@ -375,16 +495,19 @@ struct ip_vs_conn_param {
  */
 struct ip_vs_conn {
        struct list_head        c_list;         /* hashed list heads */
-
+#ifdef CONFIG_NET_NS
+       struct net              *net;           /* Name space */
+#endif
        /* Protocol, addresses and port numbers */
-       u16                      af;            /* address family */
-       union nf_inet_addr       caddr;          /* client address */
-       union nf_inet_addr       vaddr;          /* virtual address */
-       union nf_inet_addr       daddr;          /* destination address */
-       volatile __u32           flags;          /* status flags */
-       __be16                   cport;
-       __be16                   vport;
-       __be16                   dport;
+       u16                     af;             /* address family */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __u32                   fwmark;         /* Fire wall mark from skb */
+       union nf_inet_addr      caddr;          /* client address */
+       union nf_inet_addr      vaddr;          /* virtual address */
+       union nf_inet_addr      daddr;          /* destination address */
+       volatile __u32          flags;          /* status flags */
        __u16                   protocol;       /* Which protocol (TCP/UDP) */
 
        /* counter and timer */
@@ -422,10 +545,38 @@ struct ip_vs_conn {
        struct ip_vs_seq        in_seq;         /* incoming seq. struct */
        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 
+       const struct ip_vs_pe   *pe;
        char                    *pe_data;
        __u8                    pe_data_len;
 };
 
+/*
+ *  To save some memory in conn table when name space is disabled.
+ */
+static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
+{
+#ifdef CONFIG_NET_NS
+       return cp->net;
+#else
+       return &init_net;
+#endif
+}
+static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
+{
+#ifdef CONFIG_NET_NS
+       cp->net = net;
+#endif
+}
+
+static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
+                                   struct net *net)
+{
+#ifdef CONFIG_NET_NS
+       return cp->net == net;
+#else
+       return 1;
+#endif
+}
 
 /*
  *     Extended internal versions of struct ip_vs_service_user and
@@ -485,6 +636,7 @@ struct ip_vs_service {
        unsigned                flags;    /* service status flags */
        unsigned                timeout;  /* persistent timeout in ticks */
        __be32                  netmask;  /* grouping granularity */
+       struct net              *net;
 
        struct list_head        destinations;  /* real server d-linked list */
        __u32                   num_dests;     /* number of servers */
@@ -510,8 +662,8 @@ struct ip_vs_dest {
        struct list_head        d_list;   /* for table with all the dests */
 
        u16                     af;             /* address family */
-       union nf_inet_addr      addr;           /* IP address of the server */
        __be16                  port;           /* port number of the server */
+       union nf_inet_addr      addr;           /* IP address of the server */
        volatile unsigned       flags;          /* dest status flags */
        atomic_t                conn_flags;     /* flags to copy to conn */
        atomic_t                weight;         /* server weight */
@@ -538,8 +690,8 @@ struct ip_vs_dest {
        /* for virtual service */
        struct ip_vs_service    *svc;           /* service it belongs to */
        __u16                   protocol;       /* which protocol (TCP/UDP) */
-       union nf_inet_addr      vaddr;          /* virtual IP address */
        __be16                  vport;          /* virtual port number */
+       union nf_inet_addr      vaddr;          /* virtual IP address */
        __u32                   vfwmark;        /* firewall mark of service */
 };
 
@@ -674,13 +826,14 @@ enum {
        IP_VS_DIR_LAST,
 };
 
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
                                         const union nf_inet_addr *caddr,
                                         __be16 cport,
                                         const union nf_inet_addr *vaddr,
                                         __be16 vport,
                                         struct ip_vs_conn_param *p)
 {
+       p->net = net;
        p->af = af;
        p->protocol = protocol;
        p->caddr = caddr;
@@ -695,7 +848,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
 struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
 
 struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-                                           struct ip_vs_protocol *pp,
                                            const struct ip_vs_iphdr *iph,
                                            unsigned int proto_off,
                                            int inverse);
@@ -703,7 +855,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
 struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
 
 struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-                                            struct ip_vs_protocol *pp,
                                             const struct ip_vs_iphdr *iph,
                                             unsigned int proto_off,
                                             int inverse);
@@ -719,14 +870,14 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
 struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
                                  const union nf_inet_addr *daddr,
                                  __be16 dport, unsigned flags,
-                                 struct ip_vs_dest *dest);
+                                 struct ip_vs_dest *dest, __u32 fwmark);
 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
 
 extern const char * ip_vs_state_name(__u16 proto, int state);
 
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
 extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_random_dropentry(struct net *net);
 extern int ip_vs_conn_init(void);
 extern void ip_vs_conn_cleanup(void);
 
@@ -796,12 +947,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
  *      (from ip_vs_app.c)
  */
 #define IP_VS_APP_MAX_PORTS  8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
 extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
 extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
+                                 __u16 proto, __u16 port);
 extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
 extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
@@ -814,15 +965,27 @@ void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
 void ip_vs_unbind_pe(struct ip_vs_service *svc);
 int register_ip_vs_pe(struct ip_vs_pe *pe);
 int unregister_ip_vs_pe(struct ip_vs_pe *pe);
-extern struct ip_vs_pe *ip_vs_pe_get(const char *name);
-extern void ip_vs_pe_put(struct ip_vs_pe *pe);
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
+
+static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               __module_get(pe->module);
+}
+
+static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
+{
+       if (pe && pe->module)
+               module_put(pe->module);
+}
 
 /*
  *     IPVS protocol functions (from ip_vs_proto.c)
  */
 extern int ip_vs_protocol_init(void);
 extern void ip_vs_protocol_cleanup(void);
-extern void ip_vs_protocol_timeout_change(int flags);
+extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
 extern int *ip_vs_create_timeout_table(int *table, int size);
 extern int
 ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -852,26 +1015,21 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
 extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
 extern struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-              struct ip_vs_protocol *pp, int *ignored);
+              struct ip_vs_proto_data *pd, int *ignored);
 extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-                       struct ip_vs_protocol *pp);
+                       struct ip_vs_proto_data *pd);
 
 
 /*
  *      IPVS control data and functions (from ip_vs_ctl.c)
  */
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
 extern struct ip_vs_stats ip_vs_stats;
 extern const struct ctl_path net_vs_ctl_path[];
+extern int sysctl_ip_vs_sync_ver;
 
+extern void ip_vs_sync_switch_mode(struct net *net, int mode);
 extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                  const union nf_inet_addr *vaddr, __be16 vport);
 
 static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -880,7 +1038,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
 }
 
 extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                          const union nf_inet_addr *daddr, __be16 dport);
 
 extern int ip_vs_use_count_inc(void);
@@ -888,8 +1046,9 @@ extern void ip_vs_use_count_dec(void);
 extern int ip_vs_control_init(void);
 extern void ip_vs_control_cleanup(void);
 extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
-               const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
+ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
+               __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+               __u16 protocol, __u32 fwmark);
 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
 
 
@@ -897,14 +1056,12 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
  *      IPVS sync daemon data and function prototypes
  *      (from ip_vs_sync.c)
  */
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+                            __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
+extern int ip_vs_sync_init(void);
+extern void ip_vs_sync_cleanup(void);
 
 
 /*
@@ -912,8 +1069,8 @@ extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
  */
 extern int ip_vs_estimator_init(void);
 extern void ip_vs_estimator_cleanup(void);
-extern void ip_vs_new_estimator(struct ip_vs_stats *stats);
-extern void ip_vs_kill_estimator(struct ip_vs_stats *stats);
+extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
+extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
 
 /*
@@ -955,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
 extern int ip_vs_drop_rate;
 extern int ip_vs_drop_counter;
 
-static __inline__ int ip_vs_todrop(void)
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
 {
-       if (!ip_vs_drop_rate) return 0;
-       if (--ip_vs_drop_counter > 0) return 0;
-       ip_vs_drop_counter = ip_vs_drop_rate;
+       if (!ipvs->drop_rate)
+               return 0;
+       if (--ipvs->drop_counter > 0)
+               return 0;
+       ipvs->drop_counter = ipvs->drop_rate;
        return 1;
 }
 
@@ -1047,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
  *      Netfilter connection tracking
  *      (from ip_vs_nfct.c)
  */
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 {
-       return sysctl_ip_vs_conntrack;
+       return ipvs->sysctl_conntrack;
 }
 
 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1062,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
 
 #else
 
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
 {
        return 0;
 }
index 1bf812b..b3b4a34 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/netns/conntrack.h>
 #endif
 #include <net/netns/xfrm.h>
+#include <net/netns/ip_vs.h>
 
 struct proc_dir_entry;
 struct net_device;
@@ -94,6 +95,7 @@ struct net {
 #ifdef CONFIG_XFRM
        struct netns_xfrm       xfrm;
 #endif
+       struct netns_ipvs       *ipvs;
 };
 
 
index d85cff1..d0d1337 100644 (file)
@@ -50,11 +50,24 @@ union nf_conntrack_expect_proto {
 /* per conntrack: application helper private data */
 union nf_conntrack_help {
        /* insert conntrack helper private data (master) here */
+#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE)
        struct nf_ct_ftp_master ct_ftp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_PPTP) || \
+    defined(CONFIG_NF_CONNTRACK_PPTP_MODULE)
        struct nf_ct_pptp_master ct_pptp_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_H323) || \
+    defined(CONFIG_NF_CONNTRACK_H323_MODULE)
        struct nf_ct_h323_master ct_h323_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SANE) || \
+    defined(CONFIG_NF_CONNTRACK_SANE_MODULE)
        struct nf_ct_sane_master ct_sane_info;
+#endif
+#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE)
        struct nf_ct_sip_master ct_sip_info;
+#endif
 };
 
 #include <linux/types.h>
@@ -116,14 +129,14 @@ struct nf_conn {
        u_int32_t secmark;
 #endif
 
-       /* Storage reserved for other modules: */
-       union nf_conntrack_proto proto;
-
        /* Extensions */
        struct nf_ct_ext *ext;
 #ifdef CONFIG_NET_NS
        struct net *ct_net;
 #endif
+
+       /* Storage reserved for other modules, must be the last member */
+       union nf_conntrack_proto proto;
 };
 
 static inline struct nf_conn *
@@ -189,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
  * Allocate a hashtable of hlist_head (if nulls == 0),
  * or hlist_nulls_head (if nulls == 1)
  */
-extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
+extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
 
-extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
+extern void nf_ct_free_hashtable(void *hash, unsigned int size);
 
 extern struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(struct net *net, u16 zone,
index 96ba5f7..8fdb04b 100644 (file)
@@ -23,12 +23,17 @@ struct nf_conntrack_ecache {
 static inline struct nf_conntrack_ecache *
 nf_ct_ecache_find(const struct nf_conn *ct)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
+#else
+       return NULL;
+#endif
 }
 
 static inline struct nf_conntrack_ecache *
 nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        struct net *net = nf_ct_net(ct);
        struct nf_conntrack_ecache *e;
 
@@ -45,6 +50,9 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
                e->expmask = expmask;
        }
        return e;
+#else
+       return NULL;
+#endif
 };
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
@@ -59,7 +67,7 @@ struct nf_ct_event_notifier {
        int (*fcn)(unsigned int events, struct nf_ct_event *item);
 };
 
-extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
+extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
 extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
 extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
 
@@ -159,7 +167,7 @@ struct nf_exp_event_notifier {
        int (*fcn)(unsigned int events, struct nf_exp_event *item);
 };
 
-extern struct nf_exp_event_notifier *nf_expect_event_cb;
+extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
 extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb);
 extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb);
 
index 0772d29..2dcf317 100644 (file)
@@ -7,10 +7,19 @@
 
 enum nf_ct_ext_id {
        NF_CT_EXT_HELPER,
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
        NF_CT_EXT_NAT,
+#endif
        NF_CT_EXT_ACCT,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
        NF_CT_EXT_ECACHE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
        NF_CT_EXT_ZONE,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       NF_CT_EXT_TSTAMP,
+#endif
        NF_CT_EXT_NUM,
 };
 
@@ -19,6 +28,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
 #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
index 32c305d..f1c1311 100644 (file)
@@ -63,4 +63,10 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
 extern int nf_conntrack_helper_init(void);
 extern void nf_conntrack_helper_fini(void);
 
+extern int nf_conntrack_broadcast_help(struct sk_buff *skb,
+                                      unsigned int protoff,
+                                      struct nf_conn *ct,
+                                      enum ip_conntrack_info ctinfo,
+                                      unsigned int timeout);
+
 #endif /*_NF_CONNTRACK_HELPER_H*/
index a754761..e8010f4 100644 (file)
@@ -73,7 +73,7 @@ struct nf_conntrack_l3proto {
        struct module *me;
 };
 
-extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX];
+extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX];
 
 /* Protocol registration. */
 extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
new file mode 100644 (file)
index 0000000..fc9c82b
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef _NF_CONNTRACK_TSTAMP_H
+#define _NF_CONNTRACK_TSTAMP_H
+
+#include <net/net_namespace.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+
+struct nf_conn_tstamp {
+       u_int64_t start;
+       u_int64_t stop;
+};
+
+static inline
+struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP);
+#else
+       return NULL;
+#endif
+}
+
+static inline
+struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       struct net *net = nf_ct_net(ct);
+
+       if (!net->ct.sysctl_tstamp)
+               return NULL;
+
+       return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp);
+#else
+       return NULL;
+#endif
+};
+
+static inline bool nf_ct_tstamp_enabled(struct net *net)
+{
+       return net->ct.sysctl_tstamp != 0;
+}
+
+static inline void nf_ct_set_tstamp(struct net *net, bool enable)
+{
+       net->ct.sysctl_tstamp = enable;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+extern int nf_conntrack_tstamp_init(struct net *net);
+extern void nf_conntrack_tstamp_fini(struct net *net);
+#else
+static inline int nf_conntrack_tstamp_init(struct net *net)
+{
+       return 0;
+}
+
+static inline void nf_conntrack_tstamp_fini(struct net *net)
+{
+       return;
+}
+#endif /* CONFIG_NF_CONNTRACK_TIMESTAMP */
+
+#endif /* _NF_CONNTRACK_TSTAMP_H */
index f5f09f0..aff80b1 100644 (file)
@@ -56,7 +56,9 @@ struct nf_nat_multi_range_compat {
 /* per conntrack: nat application helper private data */
 union nf_conntrack_nat_help {
        /* insert nat helper private data here */
+#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
        struct nf_nat_pptp nat_pptp_info;
+#endif
 };
 
 struct nf_conn;
@@ -84,7 +86,11 @@ extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 
 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 {
+#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
        return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
+#else
+       return NULL;
+#endif
 }
 
 #else  /* !__KERNEL__: iptables wants this to compile. */
index 33602ab..3dc7b98 100644 (file)
@@ -21,9 +21,9 @@ static inline int nf_nat_initialized(struct nf_conn *ct,
                                     enum nf_nat_manip_type manip)
 {
        if (manip == IP_NAT_MANIP_SRC)
-               return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_SRC_NAT_DONE;
        else
-               return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               return ct->status & IPS_DST_NAT_DONE;
 }
 
 struct nlattr;
index d4958d4..341eb08 100644 (file)
@@ -21,15 +21,15 @@ struct netns_ct {
        int                     sysctl_events;
        unsigned int            sysctl_events_retry_timeout;
        int                     sysctl_acct;
+       int                     sysctl_tstamp;
        int                     sysctl_checksum;
        unsigned int            sysctl_log_invalid; /* Log invalid packets */
 #ifdef CONFIG_SYSCTL
        struct ctl_table_header *sysctl_header;
        struct ctl_table_header *acct_sysctl_header;
+       struct ctl_table_header *tstamp_sysctl_header;
        struct ctl_table_header *event_sysctl_header;
 #endif
-       int                     hash_vmalloc;
-       int                     expect_vmalloc;
        char                    *slabname;
 };
 #endif
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
new file mode 100644 (file)
index 0000000..259ebac
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ *  IP Virtual Server
+ *  Data structure for network namspace
+ *
+ */
+
+#ifndef IP_VS_H_
+#define IP_VS_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/list_nulls.h>
+#include <linux/ip_vs.h>
+#include <asm/atomic.h>
+#include <linux/in.h>
+
+struct ip_vs_stats;
+struct ip_vs_sync_buff;
+struct ctl_table_header;
+
+struct netns_ipvs {
+       int                     gen;            /* Generation */
+       /*
+        *      Hash table: for real service lookups
+        */
+       #define IP_VS_RTAB_BITS 4
+       #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+       #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+       struct list_head        rs_table[IP_VS_RTAB_SIZE];
+       /* ip_vs_app */
+       struct list_head        app_list;
+       struct mutex            app_mutex;
+       struct lock_class_key   app_key;        /* mutex debuging */
+
+       /* ip_vs_proto */
+       #define IP_VS_PROTO_TAB_SIZE    32      /* must be power of 2 */
+       struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+       /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+       #define TCP_APP_TAB_BITS        4
+       #define TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
+       #define TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
+       struct list_head        tcp_apps[TCP_APP_TAB_SIZE];
+       spinlock_t              tcp_app_lock;
+#endif
+       /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+       #define UDP_APP_TAB_BITS        4
+       #define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
+       #define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
+       struct list_head        udp_apps[UDP_APP_TAB_SIZE];
+       spinlock_t              udp_app_lock;
+#endif
+       /* ip_vs_proto_sctp */
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+       #define SCTP_APP_TAB_BITS       4
+       #define SCTP_APP_TAB_SIZE       (1 << SCTP_APP_TAB_BITS)
+       #define SCTP_APP_TAB_MASK       (SCTP_APP_TAB_SIZE - 1)
+       /* Hash table for SCTP application incarnations  */
+       struct list_head        sctp_apps[SCTP_APP_TAB_SIZE];
+       spinlock_t              sctp_app_lock;
+#endif
+       /* ip_vs_conn */
+       atomic_t                conn_count;      /*  connection counter */
+
+       /* ip_vs_ctl */
+       struct ip_vs_stats              *tot_stats;  /* Statistics & est. */
+       struct ip_vs_cpu_stats __percpu *cpustats;   /* Stats per cpu */
+       seqcount_t                      *ustats_seq; /* u64 read retry */
+
+       int                     num_services;    /* no of virtual services */
+       /* 1/rate drop and drop-entry variables */
+       struct delayed_work     defense_work;   /* Work handler */
+       int                     drop_rate;
+       int                     drop_counter;
+       atomic_t                dropentry;
+       /* locks in ctl.c */
+       spinlock_t              dropentry_lock;  /* drop entry handling */
+       spinlock_t              droppacket_lock; /* drop packet handling */
+       spinlock_t              securetcp_lock;  /* state and timeout tables */
+       rwlock_t                rs_lock;         /* real services table */
+       /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+       struct lock_class_key   ctl_key;        /* ctl_mutex debuging */
+       /* Trash for destinations */
+       struct list_head        dest_trash;
+       /* Service counters */
+       atomic_t                ftpsvc_counter;
+       atomic_t                nullsvc_counter;
+
+       /* sys-ctl struct */
+       struct ctl_table_header *sysctl_hdr;
+       struct ctl_table        *sysctl_tbl;
+       /* sysctl variables */
+       int                     sysctl_amemthresh;
+       int                     sysctl_am_droprate;
+       int                     sysctl_drop_entry;
+       int                     sysctl_drop_packet;
+       int                     sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+       int                     sysctl_conntrack;
+#endif
+       int                     sysctl_snat_reroute;
+       int                     sysctl_sync_ver;
+       int                     sysctl_cache_bypass;
+       int                     sysctl_expire_nodest_conn;
+       int                     sysctl_expire_quiescent_template;
+       int                     sysctl_sync_threshold[2];
+       int                     sysctl_nat_icmp_send;
+
+       /* ip_vs_lblc */
+       int                     sysctl_lblc_expiration;
+       struct ctl_table_header *lblc_ctl_header;
+       struct ctl_table        *lblc_ctl_table;
+       /* ip_vs_lblcr */
+       int                     sysctl_lblcr_expiration;
+       struct ctl_table_header *lblcr_ctl_header;
+       struct ctl_table        *lblcr_ctl_table;
+       /* ip_vs_est */
+       struct list_head        est_list;       /* estimator list */
+       spinlock_t              est_lock;
+       struct timer_list       est_timer;      /* Estimation timer */
+       /* ip_vs_sync */
+       struct list_head        sync_queue;
+       spinlock_t              sync_lock;
+       struct ip_vs_sync_buff  *sync_buff;
+       spinlock_t              sync_buff_lock;
+       struct sockaddr_in      sync_mcast_addr;
+       struct task_struct      *master_thread;
+       struct task_struct      *backup_thread;
+       int                     send_mesg_maxlen;
+       int                     recv_mesg_maxlen;
+       volatile int            sync_state;
+       volatile int            master_syncid;
+       volatile int            backup_syncid;
+       /* multicast interface name */
+       char                    master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+       char                    backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+       /* net name space ptr */
+       struct net              *net;            /* Needed by timer routines */
+};
+
+#endif /* IP_VS_H_ */
index d68c3f1..e2e2ef5 100644 (file)
@@ -43,7 +43,6 @@ struct netns_ipv4 {
        struct xt_table         *nat_table;
        struct hlist_head       *nat_bysource;
        unsigned int            nat_htable_size;
-       int                     nat_vmalloced;
 #endif
 
        int sysctl_icmp_echo_ignore_all;
index 160a407..16626a0 100644 (file)
@@ -31,10 +31,12 @@ enum qdisc_state_t {
  * following bits are only changed while qdisc lock is held
  */
 enum qdisc___state_t {
-       __QDISC___STATE_RUNNING,
+       __QDISC___STATE_RUNNING = 1,
+       __QDISC___STATE_THROTTLED = 2,
 };
 
 struct qdisc_size_table {
+       struct rcu_head         rcu;
        struct list_head        list;
        struct tc_sizespec      szopts;
        int                     refcnt;
@@ -46,14 +48,13 @@ struct Qdisc {
        struct sk_buff *        (*dequeue)(struct Qdisc *dev);
        unsigned                flags;
 #define TCQ_F_BUILTIN          1
-#define TCQ_F_THROTTLED                2
-#define TCQ_F_INGRESS          4
-#define TCQ_F_CAN_BYPASS       8
-#define TCQ_F_MQROOT           16
+#define TCQ_F_INGRESS          2
+#define TCQ_F_CAN_BYPASS       4
+#define TCQ_F_MQROOT           8
 #define TCQ_F_WARN_NONWC       (1 << 16)
        int                     padded;
        struct Qdisc_ops        *ops;
-       struct qdisc_size_table *stab;
+       struct qdisc_size_table __rcu *stab;
        struct list_head        list;
        u32                     handle;
        u32                     parent;
@@ -78,25 +79,43 @@ struct Qdisc {
        unsigned long           state;
        struct sk_buff_head     q;
        struct gnet_stats_basic_packed bstats;
-       unsigned long           __state;
+       unsigned int            __state;
        struct gnet_stats_queue qstats;
        struct rcu_head         rcu_head;
        spinlock_t              busylock;
 };
 
-static inline bool qdisc_is_running(struct Qdisc *qdisc)
+static inline bool qdisc_is_running(const struct Qdisc *qdisc)
 {
-       return test_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+       return (qdisc->__state & __QDISC___STATE_RUNNING) ? true : false;
 }
 
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
-       return !__test_and_set_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+       if (qdisc_is_running(qdisc))
+               return false;
+       qdisc->__state |= __QDISC___STATE_RUNNING;
+       return true;
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
-       __clear_bit(__QDISC___STATE_RUNNING, &qdisc->__state);
+       qdisc->__state &= ~__QDISC___STATE_RUNNING;
+}
+
+static inline bool qdisc_is_throttled(const struct Qdisc *qdisc)
+{
+       return (qdisc->__state & __QDISC___STATE_THROTTLED) ? true : false;
+}
+
+static inline void qdisc_throttled(struct Qdisc *qdisc)
+{
+       qdisc->__state |= __QDISC___STATE_THROTTLED;
+}
+
+static inline void qdisc_unthrottled(struct Qdisc *qdisc)
+{
+       qdisc->__state &= ~__QDISC___STATE_THROTTLED;
 }
 
 struct Qdisc_class_ops {
@@ -331,8 +350,8 @@ extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                                 struct Qdisc_ops *ops);
 extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
                                       struct Qdisc_ops *ops, u32 parentid);
-extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
-                                  struct qdisc_size_table *stab);
+extern void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+                                     const struct qdisc_size_table *stab);
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
@@ -411,12 +430,20 @@ enum net_xmit_qdisc_t {
 #define net_xmit_drop_count(e) (1)
 #endif
 
-static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
+                                          const struct Qdisc *sch)
 {
 #ifdef CONFIG_NET_SCHED
-       if (sch->stab)
-               qdisc_calculate_pkt_len(skb, sch->stab);
+       struct qdisc_size_table *stab = rcu_dereference_bh(sch->stab);
+
+       if (stab)
+               __qdisc_calculate_pkt_len(skb, stab);
 #endif
+}
+
+static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+       qdisc_calculate_pkt_len(skb, sch);
        return sch->enqueue(skb, sch);
 }
 
index d884d26..ba6465b 100644 (file)
@@ -1189,7 +1189,7 @@ extern void sk_filter_release_rcu(struct rcu_head *rcu);
 static inline void sk_filter_release(struct sk_filter *fp)
 {
        if (atomic_dec_and_test(&fp->refcnt))
-               call_rcu_bh(&fp->rcu, sk_filter_release_rcu);
+               call_rcu(&fp->rcu, sk_filter_release_rcu);
 }
 
 static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
index e495624..162e88e 100644 (file)
@@ -74,6 +74,8 @@ static int    audit_initialized;
 int            audit_enabled;
 int            audit_ever_enabled;
 
+EXPORT_SYMBOL_GPL(audit_enabled);
+
 /* Default state when kernel boots without any parameters. */
 static int     audit_default;
 
index 17c5ba7..29a54cc 100644 (file)
@@ -59,7 +59,6 @@
                                                 * safely advertise a maxsize
                                                 * of 64k */
 
-#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)
 /**
  * struct p9_trans_rdma - RDMA transport instance
  *
index 50a46af..2ed0056 100644 (file)
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_ip6.h>
 
-struct tcpudphdr {
-       __be16 src;
-       __be16 dst;
+union pkthdr {
+       struct {
+               __be16 src;
+               __be16 dst;
+       } tcpudphdr;
+       struct {
+               u8 type;
+               u8 code;
+       } icmphdr;
 };
 
 static bool
@@ -33,8 +39,8 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
        const struct ebt_ip6_info *info = par->matchinfo;
        const struct ipv6hdr *ih6;
        struct ipv6hdr _ip6h;
-       const struct tcpudphdr *pptr;
-       struct tcpudphdr _ports;
+       const union pkthdr *pptr;
+       union pkthdr _pkthdr;
 
        ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
        if (ih6 == NULL)
@@ -56,26 +62,34 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
                        return false;
                if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
                        return false;
-               if (!(info->bitmask & EBT_IP6_DPORT) &&
-                   !(info->bitmask & EBT_IP6_SPORT))
+               if (!(info->bitmask & ( EBT_IP6_DPORT |
+                                       EBT_IP6_SPORT | EBT_IP6_ICMP6)))
                        return true;
-               pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
-                                         &_ports);
+
+               /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
+               pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr),
+                                         &_pkthdr);
                if (pptr == NULL)
                        return false;
                if (info->bitmask & EBT_IP6_DPORT) {
-                       u32 dst = ntohs(pptr->dst);
+                       u16 dst = ntohs(pptr->tcpudphdr.dst);
                        if (FWINV(dst < info->dport[0] ||
                                  dst > info->dport[1], EBT_IP6_DPORT))
                                return false;
                }
                if (info->bitmask & EBT_IP6_SPORT) {
-                       u32 src = ntohs(pptr->src);
+                       u16 src = ntohs(pptr->tcpudphdr.src);
                        if (FWINV(src < info->sport[0] ||
                                  src > info->sport[1], EBT_IP6_SPORT))
                        return false;
                }
-               return true;
+               if ((info->bitmask & EBT_IP6_ICMP6) &&
+                    FWINV(pptr->icmphdr.type < info->icmpv6_type[0] ||
+                          pptr->icmphdr.type > info->icmpv6_type[1] ||
+                          pptr->icmphdr.code < info->icmpv6_code[0] ||
+                          pptr->icmphdr.code > info->icmpv6_code[1],
+                                                       EBT_IP6_ICMP6))
+                       return false;
        }
        return true;
 }
@@ -103,6 +117,14 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
                return -EINVAL;
        if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
                return -EINVAL;
+       if (info->bitmask & EBT_IP6_ICMP6) {
+               if ((info->invflags & EBT_IP6_PROTO) ||
+                    info->protocol != IPPROTO_ICMPV6)
+                       return -EINVAL;
+               if (info->icmpv6_type[0] > info->icmpv6_type[1] ||
+                   info->icmpv6_code[0] > info->icmpv6_code[1])
+                       return -EINVAL;
+       }
        return 0;
 }
 
index 16df053..5f1825d 100644 (file)
@@ -1764,6 +1764,7 @@ static int compat_table_info(const struct ebt_table_info *info,
 
        newinfo->entries_size = size;
 
+       xt_compat_init_offsets(AF_INET, info->nentries);
        return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info,
                                                        entries, newinfo);
 }
index c665de7..f1f98d9 100644 (file)
 #include <asm/atomic.h>
 
 #define MAX_PHY_LAYERS 7
-#define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
-#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
index d3ed264..27dab26 100644 (file)
@@ -18,7 +18,6 @@
 #define DGM_CMD_BIT  0x80
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
-#define DGM_CTRL_PKT_SIZE 1
 #define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
index 9297f7d..8303fe3 100644 (file)
@@ -25,7 +25,6 @@ struct cfserl {
        spinlock_t sync;
        bool usestx;
 };
-#define STXLEN(layr) (layr->usestx ? 1 : 0)
 
 static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
index efad410..315c0d6 100644 (file)
@@ -20,7 +20,7 @@
 #define UTIL_REMOTE_SHUTDOWN 0x82
 #define UTIL_FLOW_OFF 0x81
 #define UTIL_FLOW_ON  0x80
-#define UTIL_CTRL_PKT_SIZE 1
+
 static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
index 3b425b1..c3b1dec 100644 (file)
@@ -17,7 +17,7 @@
 #define VEI_FLOW_OFF 0x81
 #define VEI_FLOW_ON  0x80
 #define VEI_SET_PIN  0x82
-#define VEI_CTRL_PKT_SIZE 1
+
 #define container_obj(layr) container_of(layr, struct cfsrvl, layer)
 
 static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
index 24ea2d7..d162ba8 100644 (file)
@@ -1286,7 +1286,7 @@ static int __dev_close(struct net_device *dev)
        return __dev_close_many(&single);
 }
 
-int dev_close_many(struct list_head *head)
+static int dev_close_many(struct list_head *head)
 {
        struct net_device *dev, *tmp;
        LIST_HEAD(tmp_list);
@@ -1594,6 +1594,48 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
        rcu_read_unlock();
 }
 
+/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+ * @dev: Network device
+ * @txq: number of queues available
+ *
+ * If real_num_tx_queues is changed the tc mappings may no longer be
+ * valid. To resolve this verify the tc mapping remains valid and if
+ * not NULL the mapping. With no priorities mapping to this
+ * offset/count pair it will no longer be used. In the worst case TC0
+ * is invalid nothing can be done so disable priority mappings. If is
+ * expected that drivers will fix this mapping if they can before
+ * calling netif_set_real_num_tx_queues.
+ */
+static void netif_setup_tc(struct net_device *dev, unsigned int txq)
+{
+       int i;
+       struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+
+       /* If TC0 is invalidated disable TC mapping */
+       if (tc->offset + tc->count > txq) {
+               pr_warning("Number of in use tx queues changed "
+                          "invalidating tc mappings. Priority "
+                          "traffic classification disabled!\n");
+               dev->num_tc = 0;
+               return;
+       }
+
+       /* Invalidated prio to tc mappings set to TC0 */
+       for (i = 1; i < TC_BITMASK + 1; i++) {
+               int q = netdev_get_prio_tc_map(dev, i);
+
+               tc = &dev->tc_to_txq[q];
+               if (tc->offset + tc->count > txq) {
+                       pr_warning("Number of in use tx queues "
+                                  "changed. Priority %i to tc "
+                                  "mapping %i is no longer valid "
+                                  "setting map to 0\n",
+                                  i, q);
+                       netdev_set_prio_tc_map(dev, i, 0);
+               }
+       }
+}
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -1613,6 +1655,9 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
                if (rc)
                        return rc;
 
+               if (dev->num_tc)
+                       netif_setup_tc(dev, txq);
+
                if (txq < dev->real_num_tx_queues)
                        qdisc_reset_all_tx_gt(dev, txq);
        }
@@ -2162,6 +2207,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
                  unsigned int num_tx_queues)
 {
        u32 hash;
+       u16 qoffset = 0;
+       u16 qcount = num_tx_queues;
 
        if (skb_rx_queue_recorded(skb)) {
                hash = skb_get_rx_queue(skb);
@@ -2170,13 +2217,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
                return hash;
        }
 
+       if (dev->num_tc) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+               qoffset = dev->tc_to_txq[tc].offset;
+               qcount = dev->tc_to_txq[tc].count;
+       }
+
        if (skb->sk && skb->sk->sk_hash)
                hash = skb->sk->sk_hash;
        else
                hash = (__force u16) skb->protocol ^ skb->rxhash;
        hash = jhash_1word(hash, hashrnd);
 
-       return (u16) (((u64) hash * num_tx_queues) >> 32);
+       return (u16) (((u64) hash * qcount) >> 32) + qoffset;
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 
@@ -2273,15 +2326,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                                 struct netdev_queue *txq)
 {
        spinlock_t *root_lock = qdisc_lock(q);
-       bool contended = qdisc_is_running(q);
+       bool contended;
        int rc;
 
+       qdisc_skb_cb(skb)->pkt_len = skb->len;
+       qdisc_calculate_pkt_len(skb, q);
        /*
         * Heuristic to force contended enqueues to serialize on a
         * separate lock before trying to get qdisc main lock.
         * This permits __QDISC_STATE_RUNNING owner to get the lock more often
         * and dequeue packets faster.
         */
+       contended = qdisc_is_running(q);
        if (unlikely(contended))
                spin_lock(&q->busylock);
 
@@ -2299,7 +2355,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
                        skb_dst_force(skb);
 
-               qdisc_skb_cb(skb)->pkt_len = skb->len;
                qdisc_bstats_update(q, skb);
 
                if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
@@ -2314,7 +2369,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                rc = NET_XMIT_SUCCESS;
        } else {
                skb_dst_force(skb);
-               rc = qdisc_enqueue_root(skb, q);
+               rc = q->enqueue(skb, q) & NET_XMIT_MASK;
                if (qdisc_run_begin(q)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
@@ -4572,6 +4627,17 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
 }
 EXPORT_SYMBOL(dev_set_mtu);
 
+/**
+ *     dev_set_group - Change group this device belongs to
+ *     @dev: device
+ *     @new_group: group this device should belong to
+ */
+void dev_set_group(struct net_device *dev, int new_group)
+{
+       dev->group = new_group;
+}
+EXPORT_SYMBOL(dev_set_group);
+
 /**
  *     dev_set_mac_address - Change Media Access Control Address
  *     @dev: device
@@ -5679,6 +5745,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev->priv_flags = IFF_XMIT_DST_RELEASE;
        setup(dev);
        strcpy(dev->name, name);
+       dev->group = INIT_NETDEV_GROUP;
        return dev;
 
 free_pcpu:
index afc5837..232b187 100644 (file)
@@ -142,14 +142,14 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
        if (err)
                return err;
 
-       rcu_read_lock_bh();
-       filter = rcu_dereference_bh(sk->sk_filter);
+       rcu_read_lock();
+       filter = rcu_dereference(sk->sk_filter);
        if (filter) {
                unsigned int pkt_len = sk_run_filter(skb, filter->insns);
 
                err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
        }
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
 
        return err;
 }
index 60a9029..799f06e 100644 (file)
@@ -316,7 +316,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
 {
        size_t size = entries * sizeof(struct neighbour *);
        struct neigh_hash_table *ret;
-       struct neighbour **buckets;
+       struct neighbour __rcu **buckets;
 
        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
        if (!ret)
@@ -324,14 +324,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
        if (size <= PAGE_SIZE)
                buckets = kzalloc(size, GFP_ATOMIC);
        else
-               buckets = (struct neighbour **)
+               buckets = (struct neighbour __rcu **)
                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
                                           get_order(size));
        if (!buckets) {
                kfree(ret);
                return NULL;
        }
-       rcu_assign_pointer(ret->hash_buckets, buckets);
+       ret->hash_buckets = buckets;
        ret->hash_mask = entries - 1;
        get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
        return ret;
@@ -343,7 +343,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
                                                    struct neigh_hash_table,
                                                    rcu);
        size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
-       struct neighbour **buckets = nht->hash_buckets;
+       struct neighbour __rcu **buckets = nht->hash_buckets;
 
        if (size <= PAGE_SIZE)
                kfree(buckets);
@@ -1540,7 +1540,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
                panic("cannot create neighbour proc dir entry");
 #endif
 
-       tbl->nht = neigh_hash_alloc(8);
+       RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(8));
 
        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
@@ -1602,7 +1602,8 @@ int neigh_table_clear(struct neigh_table *tbl)
        }
        write_unlock(&neigh_tbl_lock);
 
-       call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
+       call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
+                neigh_hash_free_rcu);
        tbl->nht = NULL;
 
        kfree(tbl->phash_buckets);
index 750db57..c668f8c 100644 (file)
@@ -868,6 +868,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
                   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
        NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
        NLA_PUT_U32(skb, IFLA_MTU, dev->mtu);
+       NLA_PUT_U32(skb, IFLA_GROUP, dev->group);
 
        if (dev->ifindex != dev->iflink)
                NLA_PUT_U32(skb, IFLA_LINK, dev->iflink);
@@ -1265,6 +1266,11 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
                modified = 1;
        }
 
+       if (tb[IFLA_GROUP]) {
+               dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
+               modified = 1;
+       }
+
        /*
         * Interface selected by interface index but interface
         * name provided implies that a name change has been
@@ -1542,6 +1548,8 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net,
                set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
        if (tb[IFLA_LINKMODE])
                dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
+       if (tb[IFLA_GROUP])
+               dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP]));
 
        return dev;
 
@@ -1552,6 +1560,24 @@ err:
 }
 EXPORT_SYMBOL(rtnl_create_link);
 
+static int rtnl_group_changelink(struct net *net, int group,
+               struct ifinfomsg *ifm,
+               struct nlattr **tb)
+{
+       struct net_device *dev;
+       int err;
+
+       for_each_netdev(net, dev) {
+               if (dev->group == group) {
+                       err = do_setlink(dev, ifm, tb, NULL, 0);
+                       if (err < 0)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
 static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 {
        struct net *net = sock_net(skb->sk);
@@ -1579,10 +1605,12 @@ replay:
        ifm = nlmsg_data(nlh);
        if (ifm->ifi_index > 0)
                dev = __dev_get_by_index(net, ifm->ifi_index);
-       else if (ifname[0])
-               dev = __dev_get_by_name(net, ifname);
-       else
-               dev = NULL;
+       else {
+               if (ifname[0])
+                       dev = __dev_get_by_name(net, ifname);
+               else
+                       dev = NULL;
+       }
 
        err = validate_linkmsg(dev, tb);
        if (err < 0)
@@ -1646,8 +1674,13 @@ replay:
                        return do_setlink(dev, ifm, tb, ifname, modified);
                }
 
-               if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+               if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+                       if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+                               return rtnl_group_changelink(net,
+                                               nla_get_u32(tb[IFLA_GROUP]),
+                                               ifm, tb);
                        return -ENODEV;
+               }
 
                if (ifm->ifi_index)
                        return -EOPNOTSUPP;
index f2abd37..b66600b 100644 (file)
@@ -59,7 +59,6 @@ struct dn_hash
 };
 
 #define dz_key_0(key)          ((key).datum = 0)
-#define dz_prefix(key,dz)      ((key).datum)
 
 #define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
        for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
index a5a1050..8949a05 100644 (file)
@@ -140,6 +140,9 @@ config IP_ROUTE_VERBOSE
          handled by the klogd daemon which is responsible for kernel messages
          ("man klogd").
 
+config IP_ROUTE_CLASSID
+       bool
+
 config IP_PNP
        bool "IP: kernel level autoconfiguration"
        help
@@ -657,4 +660,3 @@ config TCP_MD5SIG
          on the Internet.
 
          If unsure, say N.
-
index 7981a24..9cefe72 100644 (file)
@@ -41,12 +41,12 @@ struct fib4_rule {
        __be32                  srcmask;
        __be32                  dst;
        __be32                  dstmask;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        u32                     tclassid;
 #endif
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 u32 fib_rules_tclass(struct fib_result *res)
 {
        return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0;
@@ -165,7 +165,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
        if (frh->dst_len)
                rule4->dst = nla_get_be32(tb[FRA_DST]);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (tb[FRA_FLOW])
                rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
 #endif
@@ -195,7 +195,7 @@ static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
        if (frh->tos && (rule4->tos != frh->tos))
                return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW])))
                return 0;
 #endif
@@ -224,7 +224,7 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
        if (rule4->src_len)
                NLA_PUT_BE32(skb, FRA_SRC, rule4->src);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (rule4->tclassid)
                NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid);
 #endif
index 12d3dc3..9aff11d 100644 (file)
@@ -200,7 +200,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
                    nh->nh_weight != onh->nh_weight ||
 #endif
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
                    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
@@ -422,7 +422,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 
                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                        nla = nla_find(attrs, attrlen, RTA_FLOW);
                        nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
 #endif
@@ -476,7 +476,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
                        nla = nla_find(attrs, attrlen, RTA_GATEWAY);
                        if (nla && nla_get_be32(nla) != nh->nh_gw)
                                return 1;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                        nla = nla_find(attrs, attrlen, RTA_FLOW);
                        if (nla && nla_get_u32(nla) != nh->nh_tclassid)
                                return 1;
@@ -779,7 +779,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
                        goto err_inval;
                if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
                        goto err_inval;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
                        goto err_inval;
 #endif
@@ -792,7 +792,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
                nh->nh_oif = cfg->fc_oif;
                nh->nh_gw = cfg->fc_gw;
                nh->nh_flags = cfg->fc_flags;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                nh->nh_tclassid = cfg->fc_flow;
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1002,7 +1002,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
                if (fi->fib_nh->nh_oif)
                        NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                if (fi->fib_nh[0].nh_tclassid)
                        NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
 #endif
@@ -1027,7 +1027,7 @@ int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
 
                        if (nh->nh_gw)
                                NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                        if (nh->nh_tclassid)
                                NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
 #endif
index d859bcc..d7b2b09 100644 (file)
@@ -340,7 +340,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
                }
        }
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (unlikely(skb_dst(skb)->tclassid)) {
                struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
                u32 idx = skb_dst(skb)->tclassid;
index babd1a2..f926a31 100644 (file)
@@ -206,8 +206,9 @@ config IP_NF_TARGET_REDIRECT
 
 config NF_NAT_SNMP_BASIC
        tristate "Basic SNMP-ALG support"
-       depends on NF_NAT
+       depends on NF_CONNTRACK_SNMP && NF_NAT
        depends on NETFILTER_ADVANCED
+       default NF_NAT && NF_CONNTRACK_SNMP
        ---help---
 
          This module implements an Application Layer Gateway (ALG) for
index e855fff..e95054c 100644 (file)
@@ -866,6 +866,7 @@ static int compat_table_info(const struct xt_table_info *info,
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
        loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(NFPROTO_ARP, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                if (ret != 0)
@@ -1333,6 +1334,7 @@ static int translate_compat_table(const char *name,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
        xt_compat_lock(NFPROTO_ARP);
+       xt_compat_init_offsets(NFPROTO_ARP, number);
        /* Walk through entries, checking offsets. */
        xt_entry_foreach(iter0, entry0, total_size) {
                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
index 652efea..ef7d7b9 100644 (file)
@@ -1063,6 +1063,7 @@ static int compat_table_info(const struct xt_table_info *info,
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
        loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(AF_INET, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                if (ret != 0)
@@ -1664,6 +1665,7 @@ translate_compat_table(struct net *net,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
        xt_compat_lock(AF_INET);
+       xt_compat_init_offsets(AF_INET, number);
        /* Walk through entries, checking offsets. */
        xt_entry_foreach(iter0, entry0, total_size) {
                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
index 1e26a48..403ca57 100644 (file)
@@ -300,13 +300,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
         * that the ->target() function isn't called after ->destroy() */
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct == NULL) {
-               pr_info("no conntrack!\n");
-                       /* FIXME: need to drop invalid ones, since replies
-                        * to outgoing connections of other nodes will be
-                        * marked as INVALID */
+       if (ct == NULL)
                return NF_DROP;
-       }
 
        /* special case: ICMP error handling. conntrack distinguishes between
         * error messages (RELATED) and information requests (see below) */
index 72ffc8f..d76d6c9 100644 (file)
@@ -442,8 +442,7 @@ ipt_log_packet(u_int8_t pf,
        }
 #endif
 
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                dump_mac_header(m, loginfo, skb);
 
        dump_packet(m, loginfo, skb, 0);
index 294a2a3..aef5d1f 100644 (file)
@@ -60,7 +60,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
        ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
                           dev_net(out)->ipv4.iptable_mangle);
        /* Reroute for ANY change. */
-       if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+       if (ret != NF_DROP && ret != NF_STOLEN) {
                iph = ip_hdr(skb);
 
                if (iph->saddr != saddr ||
index 63f60fc..5585980 100644 (file)
@@ -20,6 +20,7 @@
 #include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
 
 struct ct_iter_state {
        struct seq_net_private p;
@@ -35,7 +36,8 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
        for (st->bucket = 0;
             st->bucket < net->ct.htable_size;
             st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                if (!is_a_nulls(n))
                        return n;
        }
@@ -48,13 +50,14 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
        while (is_a_nulls(head)) {
                if (likely(get_nulls_value(head) == st->bucket)) {
                        if (++st->bucket >= net->ct.htable_size)
                                return NULL;
                }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
        }
        return head;
 }
@@ -217,7 +220,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
        struct hlist_node *n;
 
        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                if (n)
                        return n;
        }
@@ -230,11 +234,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
        while (head == NULL) {
                if (++st->bucket >= nf_ct_expect_hsize)
                        return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(
+                       hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
        }
        return head;
 }
index 0f23b3f..703f366 100644 (file)
@@ -44,13 +44,13 @@ static unsigned int help(struct sk_buff *skb,
 
        /* Try to get same port: if not, try to change it. */
        for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
-               int ret;
+               int res;
 
                exp->tuple.dst.u.tcp.port = htons(port);
-               ret = nf_ct_expect_related(exp);
-               if (ret == 0)
+               res = nf_ct_expect_related(exp);
+               if (res == 0)
                        break;
-               else if (ret != -EBUSY) {
+               else if (res != -EBUSY) {
                        port = 0;
                        break;
                }
index c04787c..21bcf47 100644 (file)
@@ -221,7 +221,14 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
           manips not an issue.  */
        if (maniptype == IP_NAT_MANIP_SRC &&
            !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-               if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
+               /* try the original tuple first */
+               if (in_range(orig_tuple, range)) {
+                       if (!nf_nat_used_tuple(orig_tuple, ct)) {
+                               *tuple = *orig_tuple;
+                               return;
+                       }
+               } else if (find_appropriate_src(net, zone, orig_tuple, tuple,
+                          range)) {
                        pr_debug("get_unique_tuple: Found current src map\n");
                        if (!nf_nat_used_tuple(tuple, ct))
                                return;
@@ -266,7 +273,6 @@ nf_nat_setup_info(struct nf_conn *ct,
        struct net *net = nf_ct_net(ct);
        struct nf_conntrack_tuple curr_tuple, new_tuple;
        struct nf_conn_nat *nat;
-       int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
 
        /* nat helper or nfctnetlink also setup binding */
        nat = nfct_nat(ct);
@@ -306,8 +312,7 @@ nf_nat_setup_info(struct nf_conn *ct,
                        ct->status |= IPS_DST_NAT;
        }
 
-       /* Place in source hash if this is the first time. */
-       if (have_to_hash) {
+       if (maniptype == IP_NAT_MANIP_SRC) {
                unsigned int srchash;
 
                srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -323,9 +328,9 @@ nf_nat_setup_info(struct nf_conn *ct,
 
        /* It's done. */
        if (maniptype == IP_NAT_MANIP_DST)
-               set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_DST_NAT_DONE;
        else
-               set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
+               ct->status |= IPS_SRC_NAT_DONE;
 
        return NF_ACCEPT;
 }
@@ -502,7 +507,10 @@ int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
        int ret = 0;
 
        spin_lock_bh(&nf_nat_lock);
-       if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
+       if (rcu_dereference_protected(
+                       nf_nat_protos[proto->protonum],
+                       lockdep_is_held(&nf_nat_lock)
+                       ) != &nf_nat_unknown_protocol) {
                ret = -EBUSY;
                goto out;
        }
@@ -532,7 +540,7 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
        if (nat == NULL || nat->ct == NULL)
                return;
 
-       NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
+       NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
 
        spin_lock_bh(&nf_nat_lock);
        hlist_del_rcu(&nat->bysource);
@@ -545,11 +553,10 @@ static void nf_nat_move_storage(void *new, void *old)
        struct nf_conn_nat *old_nat = old;
        struct nf_conn *ct = old_nat->ct;
 
-       if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
+       if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
                return;
 
        spin_lock_bh(&nf_nat_lock);
-       new_nat->ct = ct;
        hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
        spin_unlock_bh(&nf_nat_lock);
 }
@@ -679,8 +686,7 @@ static int __net_init nf_nat_net_init(struct net *net)
 {
        /* Leave them the same for the moment. */
        net->ipv4.nat_htable_size = net->ct.htable_size;
-       net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
-                                                      &net->ipv4.nat_vmalloced, 0);
+       net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
        if (!net->ipv4.nat_bysource)
                return -ENOMEM;
        return 0;
@@ -702,8 +708,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 {
        nf_ct_iterate_cleanup(net, &clean_nat, NULL);
        synchronize_rcu();
-       nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-                            net->ipv4.nat_htable_size);
+       nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
index ee5f419..8812a02 100644 (file)
@@ -54,6 +54,7 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -1310,9 +1311,9 @@ static int __init nf_nat_snmp_basic_init(void)
 {
        int ret = 0;
 
-       ret = nf_conntrack_helper_register(&snmp_helper);
-       if (ret < 0)
-               return ret;
+       BUG_ON(nf_nat_snmp_hook != NULL);
+       rcu_assign_pointer(nf_nat_snmp_hook, help);
+
        ret = nf_conntrack_helper_register(&snmp_trap_helper);
        if (ret < 0) {
                nf_conntrack_helper_unregister(&snmp_helper);
@@ -1323,7 +1324,7 @@ static int __init nf_nat_snmp_basic_init(void)
 
 static void __exit nf_nat_snmp_basic_fini(void)
 {
-       nf_conntrack_helper_unregister(&snmp_helper);
+       rcu_assign_pointer(nf_nat_snmp_hook, NULL);
        nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
index 351dc4e..3e5b7cc 100644 (file)
@@ -514,7 +514,7 @@ static const struct file_operations rt_cpu_seq_fops = {
        .release = seq_release,
 };
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static int rt_acct_proc_show(struct seq_file *m, void *v)
 {
        struct ip_rt_acct *dst, *src;
@@ -567,14 +567,14 @@ static int __net_init ip_rt_do_proc_init(struct net *net)
        if (!pde)
                goto err2;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
        if (!pde)
                goto err3;
 #endif
        return 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 err3:
        remove_proc_entry("rt_cache", net->proc_net_stat);
 #endif
@@ -588,7 +588,7 @@ static void __net_exit ip_rt_do_proc_exit(struct net *net)
 {
        remove_proc_entry("rt_cache", net->proc_net_stat);
        remove_proc_entry("rt_cache", net->proc_net);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        remove_proc_entry("rt_acct", net->proc_net);
 #endif
 }
@@ -1775,7 +1775,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
        memcpy(addr, &src, 4);
 }
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
        if (!(rt->dst.tclassid & 0xFFFF))
@@ -1825,7 +1825,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
                    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
                        rt->rt_gateway = FIB_RES_GW(*res);
                dst_import_metrics(dst, fi->fib_metrics);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                dst->tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
        }
@@ -1835,7 +1835,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
        if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
                dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        set_class_tag(rt, fib_rules_tclass(res));
 #endif
@@ -1891,7 +1891,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        rth->fl.mark    = skb->mark;
        rth->fl.fl4_src = saddr;
        rth->rt_src     = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
 #endif
        rth->rt_iif     =
@@ -2208,7 +2208,7 @@ local_input:
        rth->fl.mark    = skb->mark;
        rth->fl.fl4_src = saddr;
        rth->rt_src     = saddr;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        rth->dst.tclassid = itag;
 #endif
        rth->rt_iif     =
@@ -2828,7 +2828,7 @@ static int rt_fill_info(struct net *net,
        }
        if (rt->dst.dev)
                NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (rt->dst.tclassid)
                NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
@@ -3249,9 +3249,9 @@ static __net_initdata struct pernet_operations rt_genid_ops = {
 };
 
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
-#endif /* CONFIG_NET_CLS_ROUTE */
+#endif /* CONFIG_IP_ROUTE_CLASSID */
 
 static __initdata unsigned long rhash_entries;
 static int __init set_rhash_entries(char *str)
@@ -3267,7 +3267,7 @@ int __init ip_rt_init(void)
 {
        int rc = 0;
 
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
        if (!ip_rt_acct)
                panic("IP: failed to allocate ip_rt_acct\n");
index 7d227c6..47b7b8d 100644 (file)
@@ -1076,6 +1076,7 @@ static int compat_table_info(const struct xt_table_info *info,
        memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
        newinfo->initial_entries = 0;
        loc_cpu_entry = info->entries[raw_smp_processor_id()];
+       xt_compat_init_offsets(AF_INET6, info->number);
        xt_entry_foreach(iter, loc_cpu_entry, info->size) {
                ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
                if (ret != 0)
@@ -1679,6 +1680,7 @@ translate_compat_table(struct net *net,
        duprintf("translate_compat_table: size %u\n", info->size);
        j = 0;
        xt_compat_lock(AF_INET6);
+       xt_compat_init_offsets(AF_INET6, number);
        /* Walk through entries, checking offsets. */
        xt_entry_foreach(iter0, entry0, total_size) {
                ret = check_compat_entry_size_and_hooks(iter0, info, &size,
index 09c8889..05027b7 100644 (file)
@@ -452,8 +452,7 @@ ip6t_log_packet(u_int8_t pf,
               in ? in->name : "",
               out ? out->name : "");
 
-       /* MAC logging for input path only. */
-       if (in && !out)
+       if (in != NULL)
                dump_mac_header(m, loginfo, skb);
 
        dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
index 79d43aa..0857272 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 
 
 struct nf_ct_frag6_skb_cb
@@ -73,7 +74,7 @@ static struct inet_frags nf_frags;
 static struct netns_frags nf_init_frags;
 
 #ifdef CONFIG_SYSCTL
-struct ctl_table nf_ct_frag6_sysctl_table[] = {
+static struct ctl_table nf_ct_frag6_sysctl_table[] = {
        {
                .procname       = "nf_conntrack_frag6_timeout",
                .data           = &nf_init_frags.timeout,
index 86c3952..2bc6cd7 100644 (file)
@@ -123,18 +123,18 @@ static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
 }
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
-static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
+typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb);
 
-int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
-                                          struct sk_buff *skb))
+static mh_filter_t __rcu *mh_filter __read_mostly;
+
+int rawv6_mh_filter_register(mh_filter_t filter)
 {
        rcu_assign_pointer(mh_filter, filter);
        return 0;
 }
 EXPORT_SYMBOL(rawv6_mh_filter_register);
 
-int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
-                                            struct sk_buff *skb))
+int rawv6_mh_filter_unregister(mh_filter_t filter)
 {
        rcu_assign_pointer(mh_filter, NULL);
        synchronize_rcu();
@@ -192,10 +192,10 @@ static int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
                         * policy is placed in rawv6_rcv() because it is
                         * required for each socket.
                         */
-                       int (*filter)(struct sock *sock, struct sk_buff *skb);
+                       mh_filter_t *filter;
 
                        filter = rcu_dereference(mh_filter);
-                       filtered = filter ? filter(sk, skb) : 0;
+                       filtered = filter ? (*filter)(sk, skb) : 0;
                        break;
                }
 #endif
index 8ce38f1..b1599a3 100644 (file)
@@ -412,7 +412,7 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
 
        p = container_of(head, struct ip_tunnel_prl_entry, rcu_head);
        do {
-               n = p->next;
+               n = rcu_dereference_protected(p->next, 1);
                kfree(p);
                p = n;
        } while (p);
@@ -421,15 +421,17 @@ static void prl_list_destroy_rcu(struct rcu_head *head)
 static int
 ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 {
-       struct ip_tunnel_prl_entry *x, **p;
+       struct ip_tunnel_prl_entry *x;
+       struct ip_tunnel_prl_entry __rcu **p;
        int err = 0;
 
        ASSERT_RTNL();
 
        if (a && a->addr != htonl(INADDR_ANY)) {
-               for (p = &t->prl; *p; p = &(*p)->next) {
-                       if ((*p)->addr == a->addr) {
-                               x = *p;
+               for (p = &t->prl;
+                    (x = rtnl_dereference(*p)) != NULL;
+                    p = &x->next) {
+                       if (x->addr == a->addr) {
                                *p = x->next;
                                call_rcu(&x->rcu_head, prl_entry_destroy_rcu);
                                t->prl_count--;
@@ -438,9 +440,9 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
                }
                err = -ENXIO;
        } else {
-               if (t->prl) {
+               x = rtnl_dereference(t->prl);
+               if (x) {
                        t->prl_count = 0;
-                       x = t->prl;
                        call_rcu(&x->rcu_head, prl_list_destroy_rcu);
                        t->prl = NULL;
                }
@@ -1179,7 +1181,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
        if (!dev->tstats)
                return -ENOMEM;
        dev_hold(dev);
-       sitn->tunnels_wc[0]     = tunnel;
+       rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
        return 0;
 }
 
@@ -1196,11 +1198,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea
        for (prio = 1; prio < 4; prio++) {
                int h;
                for (h = 0; h < HASH_SIZE; h++) {
-                       struct ip_tunnel *t = sitn->tunnels[prio][h];
+                       struct ip_tunnel *t;
 
+                       t = rtnl_dereference(sitn->tunnels[prio][h]);
                        while (t != NULL) {
                                unregister_netdevice_queue(t->dev, head);
-                               t = t->next;
+                               t = rtnl_dereference(t->next);
                        }
                }
        }
index 1534f2b..faf7412 100644 (file)
@@ -85,6 +85,17 @@ config NF_CONNTRACK_EVENTS
 
          If unsure, say `N'.
 
+config NF_CONNTRACK_TIMESTAMP
+       bool  'Connection tracking timestamping'
+       depends on NETFILTER_ADVANCED
+       help
+         This option enables support for connection tracking timestamping.
+         This allows you to store the flow start-time and to obtain
+         the flow-stop time (once it has been destroyed) via Connection
+         tracking events.
+
+         If unsure, say `N'.
+
 config NF_CT_PROTO_DCCP
        tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
        depends on EXPERIMENTAL
@@ -185,9 +196,13 @@ config NF_CONNTRACK_IRC
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_BROADCAST
+       tristate
+
 config NF_CONNTRACK_NETBIOS_NS
        tristate "NetBIOS name service protocol support"
        depends on NETFILTER_ADVANCED
+       select NF_CONNTRACK_BROADCAST
        help
          NetBIOS name service requests are sent as broadcast messages from an
          unprivileged port and responded to with unicast messages to the
@@ -204,6 +219,21 @@ config NF_CONNTRACK_NETBIOS_NS
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CONNTRACK_SNMP
+       tristate "SNMP service protocol support"
+       depends on NETFILTER_ADVANCED
+       select NF_CONNTRACK_BROADCAST
+       help
+         SNMP service requests are sent as broadcast messages from an
+         unprivileged port and responded to with unicast messages to the
+         same port. This make them hard to firewall properly because connection
+         tracking doesn't deal with broadcasts. This helper tracks locally
+         originating SNMP service requests and the corresponding
+         responses. It relies on correct IP address configuration, specifically
+         netmask and broadcast address.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 config NF_CONNTRACK_PPTP
        tristate "PPtP protocol support"
        depends on NETFILTER_ADVANCED
@@ -326,6 +356,16 @@ config NETFILTER_XT_CONNMARK
 
 comment "Xtables targets"
 
+config NETFILTER_XT_TARGET_AUDIT
+       tristate "AUDIT target support"
+       depends on AUDIT
+       depends on NETFILTER_ADVANCED
+       ---help---
+         This option adds a 'AUDIT' target, which can be used to create
+         audit records for packets dropped/accepted.
+
+         To compileit as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_TARGET_CHECKSUM
        tristate "CHECKSUM target support"
        depends on IP_NF_MANGLE || IP6_NF_MANGLE
@@ -477,6 +517,7 @@ config NETFILTER_XT_TARGET_NFLOG
 config NETFILTER_XT_TARGET_NFQUEUE
        tristate '"NFQUEUE" target Support'
        depends on NETFILTER_ADVANCED
+       select NETFILTER_NETLINK_QUEUE
        help
          This target replaced the old obsolete QUEUE target.
 
@@ -886,7 +927,7 @@ config NETFILTER_XT_MATCH_RATEEST
 config NETFILTER_XT_MATCH_REALM
        tristate  '"realm" match support'
        depends on NETFILTER_ADVANCED
-       select NET_CLS_ROUTE
+       select IP_ROUTE_CLASSID
        help
          This option adds a `realm' match, which allows you to use the realm
          key from the routing subsystem inside iptables.
index 441050f..9ae6878 100644 (file)
@@ -1,6 +1,7 @@
 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
 
 nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -28,7 +29,9 @@ obj-$(CONFIG_NF_CONNTRACK_AMANDA) += nf_conntrack_amanda.o
 obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o
 obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o
+obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o
 obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o
+obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o
 obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o
 obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
@@ -45,6 +48,7 @@ obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
+obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
index 32fcbe2..1e00bf7 100644 (file)
@@ -175,13 +175,21 @@ next_hook:
                ret = 1;
        } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
                kfree_skb(skb);
-               ret = -(verdict >> NF_VERDICT_BITS);
+               ret = NF_DROP_GETERR(verdict);
                if (ret == 0)
                        ret = -EPERM;
        } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
-                             verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
+                              verdict >> NF_VERDICT_QBITS);
+               if (ret < 0) {
+                       if (ret == -ECANCELED)
+                               goto next_hook;
+                       if (ret == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
+               ret = 0;
        }
        rcu_read_unlock();
        return ret;
@@ -214,7 +222,7 @@ EXPORT_SYMBOL(skb_make_writable);
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
    manufactured ICMP or RST packets will not be associated with it. */
-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);
+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;
 EXPORT_SYMBOL(ip_ct_attach);
 
 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
@@ -231,7 +239,7 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nf_ct_attach);
 
-void (*nf_ct_destroy)(struct nf_conntrack *);
+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 EXPORT_SYMBOL(nf_ct_destroy);
 
 void nf_conntrack_destroy(struct nf_conntrack *nfct)
index a475ede..5c48ffb 100644 (file)
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
 EXPORT_SYMBOL(unregister_ip_vs_app);
 EXPORT_SYMBOL(register_ip_vs_app_inc);
 
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
 /*
  *     Get an ip_vs_app object
  */
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
  *     Allocate/initialize app incarnation and register it in proto apps.
  */
 static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+                 __u16 port)
 {
        struct ip_vs_protocol *pp;
        struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
                }
        }
 
-       ret = pp->register_app(inc);
+       ret = pp->register_app(net, inc);
        if (ret)
                goto out;
 
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
  *     Release app incarnation
  */
 static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 {
        struct ip_vs_protocol *pp;
 
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
                return;
 
        if (pp->unregister_app)
-               pp->unregister_app(inc);
+               pp->unregister_app(net, inc);
 
        IP_VS_DBG(9, "%s App %s:%u unregistered\n",
                  pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
  *     Register an application incarnation in protocol applications
  */
 int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+                      __u16 port)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        int result;
 
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
 
-       result = ip_vs_app_inc_new(app, proto, port);
+       result = ip_vs_app_inc_new(net, app, proto, port);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
 
        return result;
 }
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
 /*
  *     ip_vs_app registration routine
  */
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        /* increase the module use count */
        ip_vs_use_count_inc();
 
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
 
-       list_add(&app->a_list, &ip_vs_app_list);
+       list_add(&app->a_list, &ipvs->app_list);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
 
        return 0;
 }
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
  *     ip_vs_app unregistration routine
  *     We are sure there are no app incarnations attached to services
  */
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct ip_vs_app *inc, *nxt;
 
-       mutex_lock(&__ip_vs_app_mutex);
+       mutex_lock(&ipvs->app_mutex);
 
        list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
-               ip_vs_app_inc_release(inc);
+               ip_vs_app_inc_release(net, inc);
        }
 
        list_del(&app->a_list);
 
-       mutex_unlock(&__ip_vs_app_mutex);
+       mutex_unlock(&ipvs->app_mutex);
 
        /* decrease the module use count */
        ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
 /*
  *     Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
  */
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+                  struct ip_vs_protocol *pp)
 {
        return pp->app_conn_bind(cp);
 }
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
  *     /proc/net/ip_vs_app entry function
  */
 
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
 {
        struct ip_vs_app *app, *inc;
 
-       list_for_each_entry(app, &ip_vs_app_list, a_list) {
+       list_for_each_entry(app, &ipvs->app_list, a_list) {
                list_for_each_entry(inc, &app->incs_list, a_list) {
                        if (pos-- == 0)
                                return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
 
 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       mutex_lock(&__ip_vs_app_mutex);
+       struct net *net = seq_file_net(seq);
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+       mutex_lock(&ipvs->app_mutex);
+
+       return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
 }
 
 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct ip_vs_app *inc, *app;
        struct list_head *e;
+       struct net *net = seq_file_net(seq);
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        ++*pos;
        if (v == SEQ_START_TOKEN)
-               return ip_vs_app_idx(0);
+               return ip_vs_app_idx(ipvs, 0);
 
        inc = v;
        app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
                return list_entry(e, struct ip_vs_app, a_list);
 
        /* go on to next application */
-       for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+       for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
                app = list_entry(e, struct ip_vs_app, a_list);
                list_for_each_entry(inc, &app->incs_list, a_list) {
                        return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
 {
-       mutex_unlock(&__ip_vs_app_mutex);
+       struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+       mutex_unlock(&ipvs->app_mutex);
 }
 
 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
 
 static int ip_vs_app_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &ip_vs_app_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+                           sizeof(struct seq_net_private));
 }
 
 static const struct file_operations ip_vs_app_fops = {
@@ -569,15 +578,36 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-int __init ip_vs_app_init(void)
+static int __net_init __ip_vs_app_init(struct net *net)
 {
-       /* we will replace it with proc_net_ipvs_create() soon */
-       proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->app_list);
+       __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
+       proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
        return 0;
 }
 
+static void __net_exit __ip_vs_app_cleanup(struct net *net)
+{
+       proc_net_remove(net, "ip_vs_app");
+}
+
+static struct pernet_operations ip_vs_app_ops = {
+       .init = __ip_vs_app_init,
+       .exit = __ip_vs_app_cleanup,
+};
+
+int __init ip_vs_app_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_app_ops);
+       return rv;
+}
+
 
 void ip_vs_app_cleanup(void)
 {
-       proc_net_remove(&init_net, "ip_vs_app");
+       unregister_pernet_subsys(&ip_vs_app_ops);
 }
index e9adecd..83233fe 100644 (file)
 /*
  * Connection hash size. Default is what was selected at compile time.
 */
-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);
 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");
 
 /* size and mask values */
-int ip_vs_conn_tab_size;
-int ip_vs_conn_tab_mask;
+int ip_vs_conn_tab_size __read_mostly;
+static int ip_vs_conn_tab_mask __read_mostly;
 
 /*
  *  Connection hash table: for input and output packets lookups of IPVS
  */
-static struct list_head *ip_vs_conn_tab;
+static struct list_head *ip_vs_conn_tab __read_mostly;
 
 /*  SLAB cache for IPVS connections */
 static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
 
-/*  counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
 /*  counter for no client port connections */
 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
 
 /* random value for IPVS connection hash */
-static unsigned int ip_vs_conn_rnd;
+static unsigned int ip_vs_conn_rnd __read_mostly;
 
 /*
  *  Fine locking granularity for big connection hash table
  */
-#define CT_LOCKARRAY_BITS  4
+#define CT_LOCKARRAY_BITS  5
 #define CT_LOCKARRAY_SIZE  (1<<CT_LOCKARRAY_BITS)
 #define CT_LOCKARRAY_MASK  (CT_LOCKARRAY_SIZE-1)
 
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
 /*
  *     Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
                                       const union nf_inet_addr *addr,
                                       __be16 port)
 {
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6)
-               return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
-                                   (__force u32)port, proto, ip_vs_conn_rnd)
-                       & ip_vs_conn_tab_mask;
+               return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+                                   (__force u32)port, proto, ip_vs_conn_rnd) ^
+                       ((size_t)net>>8)) & ip_vs_conn_tab_mask;
 #endif
-       return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
-                           ip_vs_conn_rnd)
-               & ip_vs_conn_tab_mask;
+       return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+                           ip_vs_conn_rnd) ^
+               ((size_t)net>>8)) & ip_vs_conn_tab_mask;
 }
 
 static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,18 +163,18 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
                port = p->vport;
        }
 
-       return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+       return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
 }
 
 static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 {
        struct ip_vs_conn_param p;
 
-       ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
-                             NULL, 0, &p);
+       ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+                             &cp->caddr, cp->cport, NULL, 0, &p);
 
-       if (cp->dest && cp->dest->svc->pe) {
-               p.pe = cp->dest->svc->pe;
+       if (cp->pe) {
+               p.pe = cp->pe;
                p.pe_data = cp->pe_data;
                p.pe_data_len = cp->pe_data_len;
        }
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 }
 
 /*
- *     Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ *     Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
  *     returns bool success.
  */
 static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                if (cp->af == p->af &&
+                   p->cport == cp->cport && p->vport == cp->vport &&
                    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
                    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
-                   p->cport == cp->cport && p->vport == cp->vport &&
                    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
-                   p->protocol == cp->protocol) {
+                   p->protocol == cp->protocol &&
+                   ip_vs_conn_net_eq(cp, p->net)) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        ct_read_unlock(hash);
@@ -313,23 +311,23 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
                            struct ip_vs_conn_param *p)
 {
        __be16 _ports[2], *pptr;
+       struct net *net = skb_net(skb);
 
        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
        if (pptr == NULL)
                return 1;
 
        if (likely(!inverse))
-               ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
-                                     &iph->daddr, pptr[1], p);
+               ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+                                     pptr[0], &iph->daddr, pptr[1], p);
        else
-               ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
-                                     &iph->saddr, pptr[0], p);
+               ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+                                     pptr[1], &iph->saddr, pptr[0], p);
        return 0;
 }
 
 struct ip_vs_conn *
 ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-                       struct ip_vs_protocol *pp,
                        const struct ip_vs_iphdr *iph,
                        unsigned int proto_off, int inverse)
 {
@@ -353,8 +351,10 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
        ct_read_lock(hash);
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+               if (!ip_vs_conn_net_eq(cp, p->net))
+                       continue;
                if (p->pe_data && p->pe->ct_match) {
-                       if (p->pe->ct_match(p, cp))
+                       if (p->pe == cp->pe && p->pe->ct_match(p, cp))
                                goto out;
                        continue;
                }
@@ -404,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
        list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
                if (cp->af == p->af &&
+                   p->vport == cp->cport && p->cport == cp->dport &&
                    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
                    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
-                   p->vport == cp->cport && p->cport == cp->dport &&
-                   p->protocol == cp->protocol) {
+                   p->protocol == cp->protocol &&
+                   ip_vs_conn_net_eq(cp, p->net)) {
                        /* HIT */
                        atomic_inc(&cp->refcnt);
                        ret = cp;
@@ -428,7 +429,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 
 struct ip_vs_conn *
 ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-                        struct ip_vs_protocol *pp,
                         const struct ip_vs_iphdr *iph,
                         unsigned int proto_off, int inverse)
 {
@@ -611,9 +611,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
        struct ip_vs_dest *dest;
 
        if ((cp) && (!cp->dest)) {
-               dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
-                                      &cp->vaddr, cp->vport,
-                                      cp->protocol);
+               dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+                                      cp->dport, &cp->vaddr, cp->vport,
+                                      cp->protocol, cp->fwmark);
                ip_vs_bind_dest(cp, dest);
                return dest;
        } else
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
 int ip_vs_check_template(struct ip_vs_conn *ct)
 {
        struct ip_vs_dest *dest = ct->dest;
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
 
        /*
         * Checking the dest server status.
         */
        if ((dest == NULL) ||
            !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
-           (sysctl_ip_vs_expire_quiescent_template &&
+           (ipvs->sysctl_expire_quiescent_template &&
             (atomic_read(&dest->weight) == 0))) {
                IP_VS_DBG_BUF(9, "check_template: dest not available for "
                              "protocol %s s:%s:%d v:%s:%d "
@@ -730,6 +731,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
 static void ip_vs_conn_expire(unsigned long data)
 {
        struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
 
        cp->timeout = 60*HZ;
 
@@ -765,13 +767,14 @@ static void ip_vs_conn_expire(unsigned long data)
                if (cp->flags & IP_VS_CONN_F_NFCT)
                        ip_vs_conn_drop_conntrack(cp);
 
+               ip_vs_pe_put(cp->pe);
                kfree(cp->pe_data);
                if (unlikely(cp->app != NULL))
                        ip_vs_unbind_app(cp);
                ip_vs_unbind_dest(cp);
                if (cp->flags & IP_VS_CONN_F_NO_CPORT)
                        atomic_dec(&ip_vs_conn_no_cport_cnt);
-               atomic_dec(&ip_vs_conn_count);
+               atomic_dec(&ipvs->conn_count);
 
                kmem_cache_free(ip_vs_conn_cachep, cp);
                return;
@@ -802,10 +805,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
 struct ip_vs_conn *
 ip_vs_conn_new(const struct ip_vs_conn_param *p,
               const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
-              struct ip_vs_dest *dest)
+              struct ip_vs_dest *dest, __u32 fwmark)
 {
        struct ip_vs_conn *cp;
-       struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+       struct netns_ipvs *ipvs = net_ipvs(p->net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+                                                          p->protocol);
 
        cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
        if (cp == NULL) {
@@ -815,6 +820,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 
        INIT_LIST_HEAD(&cp->c_list);
        setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+       ip_vs_conn_net_set(cp, p->net);
        cp->af             = p->af;
        cp->protocol       = p->protocol;
        ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -826,7 +832,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
                        &cp->daddr, daddr);
        cp->dport          = dport;
        cp->flags          = flags;
-       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
+       cp->fwmark         = fwmark;
+       if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
+               ip_vs_pe_get(p->pe);
+               cp->pe = p->pe;
                cp->pe_data = p->pe_data;
                cp->pe_data_len = p->pe_data_len;
        }
@@ -842,7 +851,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
        atomic_set(&cp->n_control, 0);
        atomic_set(&cp->in_pkts, 0);
 
-       atomic_inc(&ip_vs_conn_count);
+       atomic_inc(&ipvs->conn_count);
        if (flags & IP_VS_CONN_F_NO_CPORT)
                atomic_inc(&ip_vs_conn_no_cport_cnt);
 
@@ -861,8 +870,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
 #endif
                ip_vs_bind_xmit(cp);
 
-       if (unlikely(pp && atomic_read(&pp->appcnt)))
-               ip_vs_bind_app(cp, pp);
+       if (unlikely(pd && atomic_read(&pd->appcnt)))
+               ip_vs_bind_app(cp, pd->pp);
 
        /*
         * Allow conntrack to be preserved. By default, conntrack
@@ -871,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
         * IP_VS_CONN_F_ONE_PACKET too.
         */
 
-       if (ip_vs_conntrack_enabled())
+       if (ip_vs_conntrack_enabled(ipvs))
                cp->flags |= IP_VS_CONN_F_NFCT;
 
        /* Hash it in the ip_vs_conn_tab finally */
@@ -884,17 +893,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
  *     /proc/net/ip_vs_conn entries
  */
 #ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+       struct seq_net_private p;
+       struct list_head *l;
+};
 
 static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 {
        int idx;
        struct ip_vs_conn *cp;
+       struct ip_vs_iter_state *iter = seq->private;
 
        for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
                ct_read_lock_bh(idx);
                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
                        if (pos-- == 0) {
-                               seq->private = &ip_vs_conn_tab[idx];
+                               iter->l = &ip_vs_conn_tab[idx];
                        return cp;
                        }
                }
@@ -906,14 +920,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
 
 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
 {
-       seq->private = NULL;
+       struct ip_vs_iter_state *iter = seq->private;
+
+       iter->l = NULL;
        return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
 }
 
 static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct ip_vs_conn *cp = v;
-       struct list_head *e, *l = seq->private;
+       struct ip_vs_iter_state *iter = seq->private;
+       struct list_head *e, *l = iter->l;
        int idx;
 
        ++*pos;
@@ -930,18 +947,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        while (++idx < ip_vs_conn_tab_size) {
                ct_read_lock_bh(idx);
                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-                       seq->private = &ip_vs_conn_tab[idx];
+                       iter->l = &ip_vs_conn_tab[idx];
                        return cp;
                }
                ct_read_unlock_bh(idx);
        }
-       seq->private = NULL;
+       iter->l = NULL;
        return NULL;
 }
 
 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
 {
-       struct list_head *l = seq->private;
+       struct ip_vs_iter_state *iter = seq->private;
+       struct list_head *l = iter->l;
 
        if (l)
                ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -955,18 +973,19 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Expires PEName PEData\n");
        else {
                const struct ip_vs_conn *cp = v;
+               struct net *net = seq_file_net(seq);
                char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
                size_t len = 0;
 
-               if (cp->dest && cp->pe_data &&
-                   cp->dest->svc->pe->show_pe_data) {
+               if (!ip_vs_conn_net_eq(cp, net))
+                       return 0;
+               if (cp->pe_data) {
                        pe_data[0] = ' ';
-                       len = strlen(cp->dest->svc->pe->name);
-                       memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
+                       len = strlen(cp->pe->name);
+                       memcpy(pe_data + 1, cp->pe->name, len);
                        pe_data[len + 1] = ' ';
                        len += 2;
-                       len += cp->dest->svc->pe->show_pe_data(cp,
-                                                              pe_data + len);
+                       len += cp->pe->show_pe_data(cp, pe_data + len);
                }
                pe_data[len] = '\0';
 
@@ -1004,7 +1023,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
 
 static int ip_vs_conn_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &ip_vs_conn_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+                           sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1051,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
    "Pro FromIP   FPrt ToIP     TPrt DestIP   DPrt State       Origin Expires\n");
        else {
                const struct ip_vs_conn *cp = v;
+               struct net *net = seq_file_net(seq);
+
+               if (!ip_vs_conn_net_eq(cp, net))
+                       return 0;
 
 #ifdef CONFIG_IP_VS_IPV6
                if (cp->af == AF_INET6)
@@ -1067,7 +1091,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
 
 static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
 {
-       return seq_open(file, &ip_vs_conn_sync_seq_ops);
+       return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+                           sizeof(struct ip_vs_iter_state));
 }
 
 static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1113,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 }
 
 /* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
 {
        int idx;
        struct ip_vs_conn *cp;
@@ -1133,7 +1158,8 @@ void ip_vs_random_dropentry(void)
                        if (cp->flags & IP_VS_CONN_F_TEMPLATE)
                                /* connection template */
                                continue;
-
+                       if (!ip_vs_conn_net_eq(cp, net))
+                               continue;
                        if (cp->protocol == IPPROTO_TCP) {
                                switch(cp->state) {
                                case IP_VS_TCP_S_SYN_RECV:
@@ -1168,12 +1194,13 @@ void ip_vs_random_dropentry(void)
 /*
  *      Flush all the connection entries in the ip_vs_conn_tab
  */
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
 {
        int idx;
        struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-  flush_again:
+flush_again:
        for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
                /*
                 *  Lock is actually needed in this loop.
@@ -1181,7 +1208,8 @@ static void ip_vs_conn_flush(void)
                ct_write_lock_bh(idx);
 
                list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+                       if (!ip_vs_conn_net_eq(cp, net))
+                               continue;
                        IP_VS_DBG(4, "del connection\n");
                        ip_vs_conn_expire_now(cp);
                        if (cp->control) {
@@ -1194,16 +1222,41 @@ static void ip_vs_conn_flush(void)
 
        /* the counter may be not NULL, because maybe some conn entries
           are run by slow timer handler or unhashed but still referred */
-       if (atomic_read(&ip_vs_conn_count) != 0) {
+       if (atomic_read(&ipvs->conn_count) != 0) {
                schedule();
                goto flush_again;
        }
 }
+/*
+ * per netns init and exit
+ */
+int __net_init __ip_vs_conn_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       atomic_set(&ipvs->conn_count, 0);
+
+       proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+       proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+       return 0;
+}
 
+static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+{
+       /* flush all the connection entries first */
+       ip_vs_conn_flush(net);
+       proc_net_remove(net, "ip_vs_conn");
+       proc_net_remove(net, "ip_vs_conn_sync");
+}
+static struct pernet_operations ipvs_conn_ops = {
+       .init = __ip_vs_conn_init,
+       .exit = __ip_vs_conn_cleanup,
+};
 
 int __init ip_vs_conn_init(void)
 {
        int idx;
+       int retc;
 
        /* Compute size and mask */
        ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1241,24 +1294,18 @@ int __init ip_vs_conn_init(void)
                rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
        }
 
-       proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
-       proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
+       retc = register_pernet_subsys(&ipvs_conn_ops);
 
        /* calculate the random value for connection hash */
        get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
 
-       return 0;
+       return retc;
 }
 
-
 void ip_vs_conn_cleanup(void)
 {
-       /* flush all the connection entries first */
-       ip_vs_conn_flush();
-
+       unregister_pernet_subsys(&ipvs_conn_ops);
        /* Release the empty cache */
        kmem_cache_destroy(ip_vs_conn_cachep);
-       proc_net_remove(&init_net, "ip_vs_conn");
-       proc_net_remove(&init_net, "ip_vs_conn_sync");
        vfree(ip_vs_conn_tab);
 }
index b4e51e9..f36a84f 100644 (file)
@@ -41,6 +41,7 @@
 #include <net/icmp.h>                   /* for icmp_send */
 #include <net/route.h>
 #include <net/ip6_checksum.h>
+#include <net/netns/generic.h>         /* net_generic() */
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
@@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put);
 EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 
+int ip_vs_net_id __read_mostly;
+#ifdef IP_VS_GENERIC_NETNS
+EXPORT_SYMBOL(ip_vs_net_id);
+#endif
+/* netns cnt used for uniqueness */
+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
 /* ID used in ICMP lookups */
 #define icmp_id(icmph)          (((icmph)->un).echo.id)
@@ -108,21 +115,28 @@ static inline void
 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
        struct ip_vs_dest *dest = cp->dest;
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               spin_lock(&dest->stats.lock);
-               dest->stats.ustats.inpkts++;
-               dest->stats.ustats.inbytes += skb->len;
-               spin_unlock(&dest->stats.lock);
-
-               spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.ustats.inpkts++;
-               dest->svc->stats.ustats.inbytes += skb->len;
-               spin_unlock(&dest->svc->stats.lock);
-
-               spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.ustats.inpkts++;
-               ip_vs_stats.ustats.inbytes += skb->len;
-               spin_unlock(&ip_vs_stats.lock);
+               struct ip_vs_cpu_stats *s;
+
+               s = this_cpu_ptr(dest->stats.cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(ipvs->cpustats);
+               s->ustats.inpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.inbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
        }
 }
 
@@ -131,21 +145,28 @@ static inline void
 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
        struct ip_vs_dest *dest = cp->dest;
+       struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+
        if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
-               spin_lock(&dest->stats.lock);
-               dest->stats.ustats.outpkts++;
-               dest->stats.ustats.outbytes += skb->len;
-               spin_unlock(&dest->stats.lock);
-
-               spin_lock(&dest->svc->stats.lock);
-               dest->svc->stats.ustats.outpkts++;
-               dest->svc->stats.ustats.outbytes += skb->len;
-               spin_unlock(&dest->svc->stats.lock);
-
-               spin_lock(&ip_vs_stats.lock);
-               ip_vs_stats.ustats.outpkts++;
-               ip_vs_stats.ustats.outbytes += skb->len;
-               spin_unlock(&ip_vs_stats.lock);
+               struct ip_vs_cpu_stats *s;
+
+               s = this_cpu_ptr(dest->stats.cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(dest->svc->stats.cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
+
+               s = this_cpu_ptr(ipvs->cpustats);
+               s->ustats.outpkts++;
+               u64_stats_update_begin(&s->syncp);
+               s->ustats.outbytes += skb->len;
+               u64_stats_update_end(&s->syncp);
        }
 }
 
@@ -153,41 +174,44 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
-       spin_lock(&cp->dest->stats.lock);
-       cp->dest->stats.ustats.conns++;
-       spin_unlock(&cp->dest->stats.lock);
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
+       struct ip_vs_cpu_stats *s;
+
+       s = this_cpu_ptr(cp->dest->stats.cpustats);
+       s->ustats.conns++;
 
-       spin_lock(&svc->stats.lock);
-       svc->stats.ustats.conns++;
-       spin_unlock(&svc->stats.lock);
+       s = this_cpu_ptr(svc->stats.cpustats);
+       s->ustats.conns++;
 
-       spin_lock(&ip_vs_stats.lock);
-       ip_vs_stats.ustats.conns++;
-       spin_unlock(&ip_vs_stats.lock);
+       s = this_cpu_ptr(ipvs->cpustats);
+       s->ustats.conns++;
 }
 
 
 static inline int
 ip_vs_set_state(struct ip_vs_conn *cp, int direction,
                const struct sk_buff *skb,
-               struct ip_vs_protocol *pp)
+               struct ip_vs_proto_data *pd)
 {
-       if (unlikely(!pp->state_transition))
+       if (unlikely(!pd->pp->state_transition))
                return 0;
-       return pp->state_transition(cp, direction, skb, pp);
+       return pd->pp->state_transition(cp, direction, skb, pd);
 }
 
-static inline void
+static inline int
 ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
                              struct sk_buff *skb, int protocol,
                              const union nf_inet_addr *caddr, __be16 cport,
                              const union nf_inet_addr *vaddr, __be16 vport,
                              struct ip_vs_conn_param *p)
 {
-       ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+       ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+                             vport, p);
        p->pe = svc->pe;
        if (p->pe && p->pe->fill_param)
-               p->pe->fill_param(p, skb);
+               return p->pe->fill_param(p, skb);
+
+       return 0;
 }
 
 /*
@@ -200,7 +224,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 static struct ip_vs_conn *
 ip_vs_sched_persist(struct ip_vs_service *svc,
                    struct sk_buff *skb,
-                   __be16 ports[2])
+                   __be16 src_port, __be16 dst_port, int *ignored)
 {
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_iphdr iph;
@@ -224,8 +248,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 
        IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
                      "mnet %s\n",
-                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
-                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+                     IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+                     IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
                      IP_VS_DBG_ADDR(svc->af, &snet));
 
        /*
@@ -247,14 +271,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
                __be16 vport = 0;
 
-               if (ports[1] == svc->port) {
+               if (dst_port == svc->port) {
                        /* non-FTP template:
                         * <protocol, caddr, 0, vaddr, vport, daddr, dport>
                         * FTP template:
                         * <protocol, caddr, 0, vaddr, 0, daddr, 0>
                         */
                        if (svc->port != FTPPORT)
-                               vport = ports[1];
+                               vport = dst_port;
                } else {
                        /* Note: persistent fwmark-based services and
                         * persistent port zero service are handled here.
@@ -268,24 +292,31 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                                vaddr = &fwmark;
                        }
                }
-               ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
-                                             vaddr, vport, &param);
+               /* return *ignored = -1 so NF_DROP can be used */
+               if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+                                                 vaddr, vport, &param) < 0) {
+                       *ignored = -1;
+                       return NULL;
+               }
        }
 
        /* Check if a template already exists */
        ct = ip_vs_ct_in_get(&param);
        if (!ct || !ip_vs_check_template(ct)) {
-               /* No template found or the dest of the connection
+               /*
+                * No template found or the dest of the connection
                 * template is not available.
+                * return *ignored=0 i.e. ICMP and NF_DROP
                 */
                dest = svc->scheduler->schedule(svc, skb);
                if (!dest) {
                        IP_VS_DBG(1, "p-schedule: no dest found.\n");
                        kfree(param.pe_data);
+                       *ignored = 0;
                        return NULL;
                }
 
-               if (ports[1] == svc->port && svc->port != FTPPORT)
+               if (dst_port == svc->port && svc->port != FTPPORT)
                        dport = dest->port;
 
                /* Create a template
@@ -293,9 +324,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                 * and thus param.pe_data will be destroyed
                 * when the template expires */
                ct = ip_vs_conn_new(&param, &dest->addr, dport,
-                                   IP_VS_CONN_F_TEMPLATE, dest);
+                                   IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
                if (ct == NULL) {
                        kfree(param.pe_data);
+                       *ignored = -1;
                        return NULL;
                }
 
@@ -306,7 +338,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
                kfree(param.pe_data);
        }
 
-       dport = ports[1];
+       dport = dst_port;
        if (dport == svc->port && dest->port)
                dport = dest->port;
 
@@ -317,11 +349,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
        /*
         *    Create a new connection according to the template
         */
-       ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
-                             &iph.daddr, ports[1], &param);
-       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
+       ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+                             src_port, &iph.daddr, dst_port, &param);
+
+       cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
        if (cp == NULL) {
                ip_vs_conn_put(ct);
+               *ignored = -1;
                return NULL;
        }
 
@@ -341,11 +375,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
  *  It selects a server according to the virtual service, and
  *  creates a connection entry.
  *  Protocols supported: TCP, UDP
+ *
+ *  Usage of *ignored
+ *
+ * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
+ *       svc/scheduler decides that this packet should be accepted with
+ *       NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 :   scheduler can not find destination, so try bypass or
+ *       return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 :  scheduler tried to schedule but fatal error occurred, eg.
+ *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ *       failure such as missing Call-ID, ENOMEM on skb_linearize
+ *       or pe_data. In this case we should return NF_DROP without
+ *       any attempts to send ICMP with ip_vs_leave.
  */
 struct ip_vs_conn *
 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
-              struct ip_vs_protocol *pp, int *ignored)
+              struct ip_vs_proto_data *pd, int *ignored)
 {
+       struct ip_vs_protocol *pp = pd->pp;
        struct ip_vs_conn *cp = NULL;
        struct ip_vs_iphdr iph;
        struct ip_vs_dest *dest;
@@ -371,12 +421,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        }
 
        /*
-        * Do not schedule replies from local real server. It is risky
-        * for fwmark services but mostly for persistent services.
+        *    Do not schedule replies from local real server.
         */
        if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-           (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
-           (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+           (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
                IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
                              "Not scheduling reply for existing connection");
                __ip_vs_conn_put(cp);
@@ -386,10 +434,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
        /*
         *    Persistent service
         */
-       if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
-               *ignored = 0;
-               return ip_vs_sched_persist(svc, skb, pptr);
-       }
+       if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+               return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+       *ignored = 0;
 
        /*
         *    Non-persistent service
@@ -402,8 +450,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
                return NULL;
        }
 
-       *ignored = 0;
-
        dest = svc->scheduler->schedule(svc, skb);
        if (dest == NULL) {
                IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -419,13 +465,17 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
         */
        {
                struct ip_vs_conn_param p;
-               ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
-                                     pptr[0], &iph.daddr, pptr[1], &p);
+
+               ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+                                     &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+                                     &p);
                cp = ip_vs_conn_new(&p, &dest->addr,
                                    dest->port ? dest->port : pptr[1],
-                                   flags, dest);
-               if (!cp)
+                                   flags, dest, skb->mark);
+               if (!cp) {
+                       *ignored = -1;
                        return NULL;
+               }
        }
 
        IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
@@ -447,11 +497,14 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
  *  no destination is available for a new connection.
  */
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
-               struct ip_vs_protocol *pp)
+               struct ip_vs_proto_data *pd)
 {
+       struct net *net;
+       struct netns_ipvs *ipvs;
        __be16 _ports[2], *pptr;
        struct ip_vs_iphdr iph;
        int unicast;
+
        ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
        pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -459,18 +512,20 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                ip_vs_service_put(svc);
                return NF_DROP;
        }
+       net = skb_net(skb);
 
 #ifdef CONFIG_IP_VS_IPV6
        if (svc->af == AF_INET6)
                unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
        else
 #endif
-               unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+               unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
 
        /* if it is fwmark-based service, the cache_bypass sysctl is up
           and the destination is a non-local unicast, then create
           a cache_bypass connection entry */
-       if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+       ipvs = net_ipvs(net);
+       if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
                int ret, cs;
                struct ip_vs_conn *cp;
                unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -484,12 +539,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
                {
                        struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(svc->af, iph.protocol,
+                       ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
                                              &iph.saddr, pptr[0],
                                              &iph.daddr, pptr[1], &p);
                        cp = ip_vs_conn_new(&p, &daddr, 0,
                                            IP_VS_CONN_F_BYPASS | flags,
-                                           NULL);
+                                           NULL, skb->mark);
                        if (!cp)
                                return NF_DROP;
                }
@@ -498,10 +553,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
                ip_vs_in_stats(cp, skb);
 
                /* set state */
-               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+               cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
 
                /* transmit the first SYN packet */
-               ret = cp->packet_xmit(skb, cp, pp);
+               ret = cp->packet_xmit(skb, cp, pd->pp);
                /* do not touch skb anymore */
 
                atomic_inc(&cp->in_pkts);
@@ -682,6 +737,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
                                struct ip_vs_protocol *pp,
                                unsigned int offset, unsigned int ihl)
 {
+       struct netns_ipvs *ipvs;
        unsigned int verdict = NF_DROP;
 
        if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -703,6 +759,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
        if (!skb_make_writable(skb, offset))
                goto out;
 
+       ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6)
                ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -712,11 +770,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
-               if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+               if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
                        goto out;
        } else
 #endif
-               if ((sysctl_ip_vs_snat_reroute ||
+               if ((ipvs->sysctl_snat_reroute ||
                     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
                    ip_route_me_harder(skb, RTN_LOCAL) != 0)
                        goto out;
@@ -808,7 +866,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
        if (!cp)
                return NF_ACCEPT;
 
@@ -885,7 +943,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 
        ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
        if (!cp)
                return NF_ACCEPT;
 
@@ -924,9 +982,12 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
  * Used for NAT and local client.
  */
 static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                struct ip_vs_conn *cp, int ihl)
 {
+       struct ip_vs_protocol *pp = pd->pp;
+       struct netns_ipvs *ipvs;
+
        IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
 
        if (!skb_make_writable(skb, ihl))
@@ -961,13 +1022,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
         * if it came from this machine itself.  So re-compute
         * the routing information.
         */
+       ipvs = net_ipvs(skb_net(skb));
+
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
-               if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+               if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
                        goto drop;
        } else
 #endif
-               if ((sysctl_ip_vs_snat_reroute ||
+               if ((ipvs->sysctl_snat_reroute ||
                     skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
                    ip_route_me_harder(skb, RTN_LOCAL) != 0)
                        goto drop;
@@ -975,7 +1038,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
        IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
 
        ip_vs_out_stats(cp, skb);
-       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+       ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
        skb->ipvs_property = 1;
        if (!(cp->flags & IP_VS_CONN_F_NFCT))
                ip_vs_notrack(skb);
@@ -999,9 +1062,12 @@ drop:
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+       struct net *net = NULL;
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs;
 
        EnterFunction(11);
 
@@ -1022,6 +1088,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
        if (unlikely(!skb_dst(skb)))
                return NF_ACCEPT;
 
+       net = skb_net(skb);
        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 #ifdef CONFIG_IP_VS_IPV6
        if (af == AF_INET6) {
@@ -1045,9 +1112,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
                        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
                }
 
-       pp = ip_vs_proto_get(iph.protocol);
-       if (unlikely(!pp))
+       pd = ip_vs_proto_data_get(net, iph.protocol);
+       if (unlikely(!pd))
                return NF_ACCEPT;
+       pp = pd->pp;
 
        /* reassemble IP fragments */
 #ifdef CONFIG_IP_VS_IPV6
@@ -1073,11 +1141,12 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
        /*
         * Check if the packet belongs to an existing entry
         */
-       cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+       cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+       ipvs = net_ipvs(net);
 
        if (likely(cp))
-               return handle_response(af, skb, pp, cp, iph.len);
-       if (sysctl_ip_vs_nat_icmp_send &&
+               return handle_response(af, skb, pd, cp, iph.len);
+       if (ipvs->sysctl_nat_icmp_send &&
            (pp->protocol == IPPROTO_TCP ||
             pp->protocol == IPPROTO_UDP ||
             pp->protocol == IPPROTO_SCTP)) {
@@ -1087,7 +1156,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
                                          sizeof(_ports), _ports);
                if (pptr == NULL)
                        return NF_ACCEPT;       /* Not for me */
-               if (ip_vs_lookup_real_service(af, iph.protocol,
+               if (ip_vs_lookup_real_service(net, af, iph.protocol,
                                              &iph.saddr,
                                              pptr[0])) {
                        /*
@@ -1202,12 +1271,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
 static int
 ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+       struct net *net = NULL;
        struct iphdr *iph;
        struct icmphdr  _icmph, *ic;
        struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
        struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        unsigned int offset, ihl, verdict;
        union nf_inet_addr snet;
 
@@ -1249,9 +1320,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
        if (cih == NULL)
                return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-       pp = ip_vs_proto_get(cih->protocol);
-       if (!pp)
+       net = skb_net(skb);
+       pd = ip_vs_proto_data_get(net, cih->protocol);
+       if (!pd)
                return NF_ACCEPT;
+       pp = pd->pp;
 
        /* Is the embedded protocol header present? */
        if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1265,10 +1338,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        ip_vs_fill_iphdr(AF_INET, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
        if (!cp) {
                /* The packet could also belong to a local client */
-               cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+               cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
                if (cp) {
                        snet.ip = iph->saddr;
                        return handle_response_icmp(AF_INET, skb, &snet,
@@ -1312,6 +1385,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 static int
 ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 {
+       struct net *net = NULL;
        struct ipv6hdr *iph;
        struct icmp6hdr _icmph, *ic;
        struct ipv6hdr  _ciph, *cih;    /* The ip header contained
@@ -1319,6 +1393,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
        struct ip_vs_iphdr ciph;
        struct ip_vs_conn *cp;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        unsigned int offset, verdict;
        union nf_inet_addr snet;
        struct rt6_info *rt;
@@ -1361,9 +1436,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
        if (cih == NULL)
                return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-       pp = ip_vs_proto_get(cih->nexthdr);
-       if (!pp)
+       net = skb_net(skb);
+       pd = ip_vs_proto_data_get(net, cih->nexthdr);
+       if (!pd)
                return NF_ACCEPT;
+       pp = pd->pp;
 
        /* Is the embedded protocol header present? */
        /* TODO: we don't support fragmentation at the moment anyways */
@@ -1377,10 +1454,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
        /* The embedded headers contain source and dest in reverse order */
-       cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+       cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
        if (!cp) {
                /* The packet could also belong to a local client */
-               cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+               cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
                if (cp) {
                        ipv6_addr_copy(&snet.in6, &iph->saddr);
                        return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1423,10 +1500,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 static unsigned int
 ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 {
+       struct net *net;
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
        int ret, restart, pkts;
+       struct netns_ipvs *ipvs;
 
        /* Already marked as IPVS request or reply? */
        if (skb->ipvs_property)
@@ -1480,20 +1560,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
                        ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
                }
 
+       net = skb_net(skb);
        /* Protocol supported? */
-       pp = ip_vs_proto_get(iph.protocol);
-       if (unlikely(!pp))
+       pd = ip_vs_proto_data_get(net, iph.protocol);
+       if (unlikely(!pd))
                return NF_ACCEPT;
-
+       pp = pd->pp;
        /*
         * Check if the packet belongs to an existing connection entry
         */
-       cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+       cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
 
        if (unlikely(!cp)) {
                int v;
 
-               if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+               if (!pp->conn_schedule(af, skb, pd, &v, &cp))
                        return v;
        }
 
@@ -1505,12 +1586,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        }
 
        IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+       net = skb_net(skb);
+       ipvs = net_ipvs(net);
        /* Check the server status */
        if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
                /* the destination server is not available */
 
-               if (sysctl_ip_vs_expire_nodest_conn) {
+               if (ipvs->sysctl_expire_nodest_conn) {
                        /* try to expire the connection immediately */
                        ip_vs_conn_expire_now(cp);
                }
@@ -1521,7 +1603,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        }
 
        ip_vs_in_stats(cp, skb);
-       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+       restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
        if (cp->packet_xmit)
                ret = cp->packet_xmit(skb, cp, pp);
                /* do not touch skb anymore */
@@ -1535,35 +1617,41 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
         *
         * Sync connection if it is about to close to
         * encorage the standby servers to update the connections timeout
+        *
+        * For ONE_PKT let ip_vs_sync_conn() do the filter work.
         */
-       pkts = atomic_add_return(1, &cp->in_pkts);
-       if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               pkts = ipvs->sysctl_sync_threshold[0];
+       else
+               pkts = atomic_add_return(1, &cp->in_pkts);
+
+       if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
            cp->protocol == IPPROTO_SCTP) {
                if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
-                       (pkts % sysctl_ip_vs_sync_threshold[1]
-                        == sysctl_ip_vs_sync_threshold[0])) ||
+                       (pkts % ipvs->sysctl_sync_threshold[1]
+                        == ipvs->sysctl_sync_threshold[0])) ||
                                (cp->old_state != cp->state &&
                                 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
                                  (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
                                  (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
-                       ip_vs_sync_conn(cp);
+                       ip_vs_sync_conn(net, cp);
                        goto out;
                }
        }
 
        /* Keep this block last: TCP and others with pp->num_states <= 1 */
-       else if (af == AF_INET &&
-           (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+       else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
            (((cp->protocol != IPPROTO_TCP ||
               cp->state == IP_VS_TCP_S_ESTABLISHED) &&
-             (pkts % sysctl_ip_vs_sync_threshold[1]
-              == sysctl_ip_vs_sync_threshold[0])) ||
+             (pkts % ipvs->sysctl_sync_threshold[1]
+              == ipvs->sysctl_sync_threshold[0])) ||
             ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
              ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
               (cp->state == IP_VS_TCP_S_CLOSE) ||
               (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
               (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
-               ip_vs_sync_conn(cp);
+               ip_vs_sync_conn(net, cp);
 out:
        cp->old_state = cp->state;
 
@@ -1782,7 +1870,41 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
        },
 #endif
 };
+/*
+ *     Initialize IP Virtual Server netns mem.
+ */
+static int __net_init __ip_vs_init(struct net *net)
+{
+       struct netns_ipvs *ipvs;
+
+       ipvs = net_generic(net, ip_vs_net_id);
+       if (ipvs == NULL) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       ipvs->net = net;
+       /* Counters used for creating unique names */
+       ipvs->gen = atomic_read(&ipvs_netns_cnt);
+       atomic_inc(&ipvs_netns_cnt);
+       net->ipvs = ipvs;
+       printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
+                        sizeof(struct netns_ipvs), ipvs->gen);
+       return 0;
+}
+
+static void __net_exit __ip_vs_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
+       IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);
+}
+
+static struct pernet_operations ipvs_core_ops = {
+       .init = __ip_vs_init,
+       .exit = __ip_vs_cleanup,
+       .id   = &ip_vs_net_id,
+       .size = sizeof(struct netns_ipvs),
+};
 
 /*
  *     Initialize IP Virtual Server
@@ -1791,8 +1913,11 @@ static int __init ip_vs_init(void)
 {
        int ret;
 
-       ip_vs_estimator_init();
+       ret = register_pernet_subsys(&ipvs_core_ops);   /* Alloc ip_vs struct */
+       if (ret < 0)
+               return ret;
 
+       ip_vs_estimator_init();
        ret = ip_vs_control_init();
        if (ret < 0) {
                pr_err("can't setup control.\n");
@@ -1813,15 +1938,23 @@ static int __init ip_vs_init(void)
                goto cleanup_app;
        }
 
+       ret = ip_vs_sync_init();
+       if (ret < 0) {
+               pr_err("can't setup sync data.\n");
+               goto cleanup_conn;
+       }
+
        ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
        if (ret < 0) {
                pr_err("can't register hooks.\n");
-               goto cleanup_conn;
+               goto cleanup_sync;
        }
 
        pr_info("ipvs loaded.\n");
        return ret;
 
+cleanup_sync:
+       ip_vs_sync_cleanup();
   cleanup_conn:
        ip_vs_conn_cleanup();
   cleanup_app:
@@ -1831,17 +1964,20 @@ static int __init ip_vs_init(void)
        ip_vs_control_cleanup();
   cleanup_estimator:
        ip_vs_estimator_cleanup();
+       unregister_pernet_subsys(&ipvs_core_ops);       /* free ip_vs struct */
        return ret;
 }
 
 static void __exit ip_vs_cleanup(void)
 {
        nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+       ip_vs_sync_cleanup();
        ip_vs_conn_cleanup();
        ip_vs_app_cleanup();
        ip_vs_protocol_cleanup();
        ip_vs_control_cleanup();
        ip_vs_estimator_cleanup();
+       unregister_pernet_subsys(&ipvs_core_ops);       /* free ip_vs struct */
        pr_info("ipvs unloaded.\n");
 }
 
index 22f7ad5..09ca2ce 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 
 #include <net/net_namespace.h>
+#include <linux/nsproxy.h>
 #include <net/ip.h>
 #ifdef CONFIG_IP_VS_IPV6
 #include <net/ipv6.h>
@@ -57,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
 /* lock for service table */
 static DEFINE_RWLOCK(__ip_vs_svc_lock);
 
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
 /* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-
 
 #ifdef CONFIG_IP_VS_DEBUG
 static int sysctl_ip_vs_debug_level = 0;
@@ -105,7 +71,8 @@ int ip_vs_get_debug_level(void)
 
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+                                   const struct in6_addr *addr)
 {
        struct rt6_info *rt;
        struct flowi fl = {
@@ -114,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
                .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
        };
 
-       rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+       rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
        if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
                        return 1;
 
@@ -125,7 +92,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
  *     update_defense_level is called from keventd and from sysctl,
  *     so it needs to protect itself from softirqs
  */
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
 {
        struct sysinfo i;
        static int old_secure_tcp = 0;
@@ -141,73 +108,73 @@ static void update_defense_level(void)
        /* si_swapinfo(&i); */
        /* availmem = availmem - (i.totalswap - i.freeswap); */
 
-       nomem = (availmem < sysctl_ip_vs_amemthresh);
+       nomem = (availmem < ipvs->sysctl_amemthresh);
 
        local_bh_disable();
 
        /* drop_entry */
-       spin_lock(&__ip_vs_dropentry_lock);
-       switch (sysctl_ip_vs_drop_entry) {
+       spin_lock(&ipvs->dropentry_lock);
+       switch (ipvs->sysctl_drop_entry) {
        case 0:
-               atomic_set(&ip_vs_dropentry, 0);
+               atomic_set(&ipvs->dropentry, 0);
                break;
        case 1:
                if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
-                       sysctl_ip_vs_drop_entry = 2;
+                       atomic_set(&ipvs->dropentry, 1);
+                       ipvs->sysctl_drop_entry = 2;
                } else {
-                       atomic_set(&ip_vs_dropentry, 0);
+                       atomic_set(&ipvs->dropentry, 0);
                }
                break;
        case 2:
                if (nomem) {
-                       atomic_set(&ip_vs_dropentry, 1);
+                       atomic_set(&ipvs->dropentry, 1);
                } else {
-                       atomic_set(&ip_vs_dropentry, 0);
-                       sysctl_ip_vs_drop_entry = 1;
+                       atomic_set(&ipvs->dropentry, 0);
+                       ipvs->sysctl_drop_entry = 1;
                };
                break;
        case 3:
-               atomic_set(&ip_vs_dropentry, 1);
+               atomic_set(&ipvs->dropentry, 1);
                break;
        }
-       spin_unlock(&__ip_vs_dropentry_lock);
+       spin_unlock(&ipvs->dropentry_lock);
 
        /* drop_packet */
-       spin_lock(&__ip_vs_droppacket_lock);
-       switch (sysctl_ip_vs_drop_packet) {
+       spin_lock(&ipvs->droppacket_lock);
+       switch (ipvs->sysctl_drop_packet) {
        case 0:
-               ip_vs_drop_rate = 0;
+               ipvs->drop_rate = 0;
                break;
        case 1:
                if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
-                       sysctl_ip_vs_drop_packet = 2;
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
+                       ipvs->sysctl_drop_packet = 2;
                } else {
-                       ip_vs_drop_rate = 0;
+                       ipvs->drop_rate = 0;
                }
                break;
        case 2:
                if (nomem) {
-                       ip_vs_drop_rate = ip_vs_drop_counter
-                               = sysctl_ip_vs_amemthresh /
-                               (sysctl_ip_vs_amemthresh-availmem);
+                       ipvs->drop_rate = ipvs->drop_counter
+                               = ipvs->sysctl_amemthresh /
+                               (ipvs->sysctl_amemthresh-availmem);
                } else {
-                       ip_vs_drop_rate = 0;
-                       sysctl_ip_vs_drop_packet = 1;
+                       ipvs->drop_rate = 0;
+                       ipvs->sysctl_drop_packet = 1;
                }
                break;
        case 3:
-               ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+               ipvs->drop_rate = ipvs->sysctl_am_droprate;
                break;
        }
-       spin_unlock(&__ip_vs_droppacket_lock);
+       spin_unlock(&ipvs->droppacket_lock);
 
        /* secure_tcp */
-       spin_lock(&ip_vs_securetcp_lock);
-       switch (sysctl_ip_vs_secure_tcp) {
+       spin_lock(&ipvs->securetcp_lock);
+       switch (ipvs->sysctl_secure_tcp) {
        case 0:
                if (old_secure_tcp >= 2)
                        to_change = 0;
@@ -216,7 +183,7 @@ static void update_defense_level(void)
                if (nomem) {
                        if (old_secure_tcp < 2)
                                to_change = 1;
-                       sysctl_ip_vs_secure_tcp = 2;
+                       ipvs->sysctl_secure_tcp = 2;
                } else {
                        if (old_secure_tcp >= 2)
                                to_change = 0;
@@ -229,7 +196,7 @@ static void update_defense_level(void)
                } else {
                        if (old_secure_tcp >= 2)
                                to_change = 0;
-                       sysctl_ip_vs_secure_tcp = 1;
+                       ipvs->sysctl_secure_tcp = 1;
                }
                break;
        case 3:
@@ -237,10 +204,11 @@ static void update_defense_level(void)
                        to_change = 1;
                break;
        }
-       old_secure_tcp = sysctl_ip_vs_secure_tcp;
+       old_secure_tcp = ipvs->sysctl_secure_tcp;
        if (to_change >= 0)
-               ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
-       spin_unlock(&ip_vs_securetcp_lock);
+               ip_vs_protocol_timeout_change(ipvs,
+                                             ipvs->sysctl_secure_tcp > 1);
+       spin_unlock(&ipvs->securetcp_lock);
 
        local_bh_enable();
 }
@@ -250,16 +218,16 @@ static void update_defense_level(void)
  *     Timer for checking the defense
  */
 #define DEFENSE_TIMER_PERIOD   1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
 
 static void defense_work_handler(struct work_struct *work)
 {
-       update_defense_level();
-       if (atomic_read(&ip_vs_dropentry))
-               ip_vs_random_dropentry();
+       struct netns_ipvs *ipvs =
+               container_of(work, struct netns_ipvs, defense_work.work);
 
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+       update_defense_level(ipvs);
+       if (atomic_read(&ipvs->dropentry))
+               ip_vs_random_dropentry(ipvs->net);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
 
 int
@@ -287,33 +255,13 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
 /* the service table hashed by fwmark */
 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
 
-/*
- *     Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
- *     Trash for destinations
- */
-static LIST_HEAD(ip_vs_dest_trash);
-
-/*
- *     FTP & NULL virtual service counters
- */
-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
-
 
 /*
  *     Returns hash value for virtual service
  */
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
-                 __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+                 const union nf_inet_addr *addr, __be16 port)
 {
        register unsigned porth = ntohs(port);
        __be32 addr_fold = addr->ip;
@@ -323,6 +271,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
                addr_fold = addr->ip6[0]^addr->ip6[1]^
                            addr->ip6[2]^addr->ip6[3];
 #endif
+       addr_fold ^= ((size_t)net>>8);
 
        return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
                & IP_VS_SVC_TAB_MASK;
@@ -331,13 +280,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
 /*
  *     Returns hash value of fwmark for virtual service lookup
  */
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
 {
-       return fwmark & IP_VS_SVC_TAB_MASK;
+       return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 }
 
 /*
- *     Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ *     Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
  *     or in the ip_vs_svc_fwm_table by fwmark.
  *     Should be called with locked tables.
  */
@@ -353,16 +302,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
        if (svc->fwmark == 0) {
                /*
-                *  Hash it by <protocol,addr,port> in ip_vs_svc_table
+                *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
                 */
-               hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
-                                        svc->port);
+               hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+                                        &svc->addr, svc->port);
                list_add(&svc->s_list, &ip_vs_svc_table[hash]);
        } else {
                /*
-                *  Hash it by fwmark in ip_vs_svc_fwm_table
+                *  Hash it by fwmark in svc_fwm_table
                 */
-               hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+               hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
                list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
        }
 
@@ -374,7 +323,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 
 
 /*
- *     Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ *     Unhashes a service from svc_table / svc_fwm_table.
  *     Should be called with locked tables.
  */
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +335,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
        }
 
        if (svc->fwmark == 0) {
-               /* Remove it from the ip_vs_svc_table table */
+               /* Remove it from the svc_table table */
                list_del(&svc->s_list);
        } else {
-               /* Remove it from the ip_vs_svc_fwm_table table */
+               /* Remove it from the svc_fwm_table table */
                list_del(&svc->f_list);
        }
 
@@ -400,23 +349,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 
 
 /*
- *     Get service by {proto,addr,port} in the service table.
+ *     Get service by {netns, proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
-                   __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+                    const union nf_inet_addr *vaddr, __be16 vport)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for "full" addressed entries */
-       hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+       hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
 
        list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
                if ((svc->af == af)
                    && ip_vs_addr_equal(af, &svc->addr, vaddr)
                    && (svc->port == vport)
-                   && (svc->protocol == protocol)) {
+                   && (svc->protocol == protocol)
+                   && net_eq(svc->net, net)) {
                        /* HIT */
                        return svc;
                }
@@ -430,16 +380,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
  *     Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 {
        unsigned hash;
        struct ip_vs_service *svc;
 
        /* Check for fwmark addressed entries */
-       hash = ip_vs_svc_fwm_hashkey(fwmark);
+       hash = ip_vs_svc_fwm_hashkey(net, fwmark);
 
        list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
-               if (svc->fwmark == fwmark && svc->af == af) {
+               if (svc->fwmark == fwmark && svc->af == af
+                   && net_eq(svc->net, net)) {
                        /* HIT */
                        return svc;
                }
@@ -449,42 +400,44 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
 }
 
 struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
                  const union nf_inet_addr *vaddr, __be16 vport)
 {
        struct ip_vs_service *svc;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        read_lock(&__ip_vs_svc_lock);
 
        /*
         *      Check the table hashed by fwmark first
         */
-       if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+       svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+       if (fwmark && svc)
                goto out;
 
        /*
         *      Check the table hashed by <protocol,addr,port>
         *      for "full" addressed entries
         */
-       svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+       svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
 
        if (svc == NULL
            && protocol == IPPROTO_TCP
-           && atomic_read(&ip_vs_ftpsvc_counter)
+           && atomic_read(&ipvs->ftpsvc_counter)
            && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
                /*
                 * Check if ftp service entry exists, the packet
                 * might belong to FTP data connections.
                 */
-               svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
        }
 
        if (svc == NULL
-           && atomic_read(&ip_vs_nullsvc_counter)) {
+           && atomic_read(&ipvs->nullsvc_counter)) {
                /*
                 * Check if the catch-all port (port zero) exists
                 */
-               svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+               svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
        }
 
   out:
@@ -519,6 +472,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
                              svc->fwmark,
                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
                              ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
 }
@@ -545,10 +499,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
 }
 
 /*
- *     Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ *     Hashes ip_vs_dest in rs_table by <proto,addr,port>.
  *     should be called with locked tables.
  */
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
 {
        unsigned hash;
 
@@ -562,19 +516,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
         */
        hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
 
-       list_add(&dest->d_list, &ip_vs_rtable[hash]);
+       list_add(&dest->d_list, &ipvs->rs_table[hash]);
 
        return 1;
 }
 
 /*
- *     UNhashes ip_vs_dest from ip_vs_rtable.
+ *     UNhashes ip_vs_dest from rs_table.
  *     should be called with locked tables.
  */
 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
 {
        /*
-        * Remove it from the ip_vs_rtable table.
+        * Remove it from the rs_table table.
         */
        if (!list_empty(&dest->d_list)) {
                list_del(&dest->d_list);
@@ -588,10 +542,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
  *     Lookup real service by <proto,addr,port> in the real service table.
  */
 struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
                          const union nf_inet_addr *daddr,
                          __be16 dport)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        unsigned hash;
        struct ip_vs_dest *dest;
 
@@ -601,19 +556,19 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
         */
        hash = ip_vs_rs_hashkey(af, daddr, dport);
 
-       read_lock(&__ip_vs_rs_lock);
-       list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+       read_lock(&ipvs->rs_lock);
+       list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
                if ((dest->af == af)
                    && ip_vs_addr_equal(af, &dest->addr, daddr)
                    && (dest->port == dport)
                    && ((dest->protocol == protocol) ||
                        dest->vfwmark)) {
                        /* HIT */
-                       read_unlock(&__ip_vs_rs_lock);
+                       read_unlock(&ipvs->rs_lock);
                        return dest;
                }
        }
-       read_unlock(&__ip_vs_rs_lock);
+       read_unlock(&ipvs->rs_lock);
 
        return NULL;
 }
@@ -652,15 +607,16 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  * ip_vs_lookup_real_service() looked promissing, but
  * seems not working as expected.
  */
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
+                                  const union nf_inet_addr *daddr,
                                   __be16 dport,
                                   const union nf_inet_addr *vaddr,
-                                  __be16 vport, __u16 protocol)
+                                  __be16 vport, __u16 protocol, __u32 fwmark)
 {
        struct ip_vs_dest *dest;
        struct ip_vs_service *svc;
 
-       svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+       svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
        if (!svc)
                return NULL;
        dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -685,11 +641,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                     __be16 dport)
 {
        struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        /*
         * Find the destination in trash
         */
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
                              "dest->refcnt=%d\n",
                              dest->vfwmark,
@@ -720,6 +677,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
                        list_del(&dest->n_list);
                        ip_vs_dst_reset(dest);
                        __ip_vs_unbind_svc(dest);
+                       free_percpu(dest->stats.cpustats);
                        kfree(dest);
                }
        }
@@ -737,14 +695,16 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
  *  are expired, and the refcnt of each destination in the trash must
  *  be 1, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(void)
+static void ip_vs_trash_cleanup(struct net *net)
 {
        struct ip_vs_dest *dest, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
+       list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
                list_del(&dest->n_list);
                ip_vs_dst_reset(dest);
                __ip_vs_unbind_svc(dest);
+               free_percpu(dest->stats.cpustats);
                kfree(dest);
        }
 }
@@ -768,6 +728,7 @@ static void
 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                    struct ip_vs_dest_user_kern *udest, int add)
 {
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
        int conn_flags;
 
        /* set the weight and the flags */
@@ -780,12 +741,12 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
                conn_flags |= IP_VS_CONN_F_NOOUTPUT;
        } else {
                /*
-                *    Put the real service in ip_vs_rtable if not present.
+                *    Put the real service in rs_table if not present.
                 *    For now only for NAT!
                 */
-               write_lock_bh(&__ip_vs_rs_lock);
-               ip_vs_rs_hash(dest);
-               write_unlock_bh(&__ip_vs_rs_lock);
+               write_lock_bh(&ipvs->rs_lock);
+               ip_vs_rs_hash(ipvs, dest);
+               write_unlock_bh(&ipvs->rs_lock);
        }
        atomic_set(&dest->conn_flags, conn_flags);
 
@@ -813,7 +774,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
        spin_unlock(&dest->dst_lock);
 
        if (add)
-               ip_vs_new_estimator(&dest->stats);
+               ip_vs_new_estimator(svc->net, &dest->stats);
 
        write_lock_bh(&__ip_vs_svc_lock);
 
@@ -850,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                atype = ipv6_addr_type(&udest->addr.in6);
                if ((!(atype & IPV6_ADDR_UNICAST) ||
                        atype & IPV6_ADDR_LINKLOCAL) &&
-                       !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+                       !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
                        return -EINVAL;
        } else
 #endif
        {
-               atype = inet_addr_type(&init_net, udest->addr.ip);
+               atype = inet_addr_type(svc->net, udest->addr.ip);
                if (atype != RTN_LOCAL && atype != RTN_UNICAST)
                        return -EINVAL;
        }
@@ -865,6 +826,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
                pr_err("%s(): no memory.\n", __func__);
                return -ENOMEM;
        }
+       dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!dest->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto err_alloc;
+       }
 
        dest->af = svc->af;
        dest->protocol = svc->protocol;
@@ -888,6 +854,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
        LeaveFunction(2);
        return 0;
+
+err_alloc:
+       kfree(dest);
+       return -ENOMEM;
 }
 
 
@@ -1006,16 +976,18 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *     Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct ip_vs_dest *dest)
+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
 {
-       ip_vs_kill_estimator(&dest->stats);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_kill_estimator(net, &dest->stats);
 
        /*
         *  Remove it from the d-linked list with the real services.
         */
-       write_lock_bh(&__ip_vs_rs_lock);
+       write_lock_bh(&ipvs->rs_lock);
        ip_vs_rs_unhash(dest);
-       write_unlock_bh(&__ip_vs_rs_lock);
+       write_unlock_bh(&ipvs->rs_lock);
 
        /*
         *  Decrease the refcnt of the dest, and free the dest
@@ -1034,6 +1006,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                   and only one user context can update virtual service at a
                   time, so the operation here is OK */
                atomic_dec(&dest->svc->refcnt);
+               free_percpu(dest->stats.cpustats);
                kfree(dest);
        } else {
                IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
@@ -1041,7 +1014,7 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
                              IP_VS_DBG_ADDR(dest->af, &dest->addr),
                              ntohs(dest->port),
                              atomic_read(&dest->refcnt));
-               list_add(&dest->n_list, &ip_vs_dest_trash);
+               list_add(&dest->n_list, &ipvs->dest_trash);
                atomic_inc(&dest->refcnt);
        }
 }
@@ -1105,7 +1078,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
        /*
         *      Delete the destination
         */
-       __ip_vs_del_dest(dest);
+       __ip_vs_del_dest(svc->net, dest);
 
        LeaveFunction(2);
 
@@ -1117,13 +1090,14 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
  *     Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
                  struct ip_vs_service **svc_p)
 {
        int ret = 0;
        struct ip_vs_scheduler *sched = NULL;
        struct ip_vs_pe *pe = NULL;
        struct ip_vs_service *svc = NULL;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
        /* increase the module use count */
        ip_vs_use_count_inc();
@@ -1137,7 +1111,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        }
 
        if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                if (pe == NULL) {
                        pr_info("persistence engine module ip_vs_pe_%s "
                                "not found\n", u->pe_name);
@@ -1159,6 +1133,11 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                ret = -ENOMEM;
                goto out_err;
        }
+       svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!svc->stats.cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto out_err;
+       }
 
        /* I'm the first user of the service */
        atomic_set(&svc->usecnt, 0);
@@ -1172,6 +1151,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        svc->flags = u->flags;
        svc->timeout = u->timeout * HZ;
        svc->netmask = u->netmask;
+       svc->net = net;
 
        INIT_LIST_HEAD(&svc->destinations);
        rwlock_init(&svc->sched_lock);
@@ -1189,15 +1169,15 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 
        /* Update the virtual service counters */
        if (svc->port == FTPPORT)
-               atomic_inc(&ip_vs_ftpsvc_counter);
+               atomic_inc(&ipvs->ftpsvc_counter);
        else if (svc->port == 0)
-               atomic_inc(&ip_vs_nullsvc_counter);
+               atomic_inc(&ipvs->nullsvc_counter);
 
-       ip_vs_new_estimator(&svc->stats);
+       ip_vs_new_estimator(net, &svc->stats);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
-               ip_vs_num_services++;
+               ipvs->num_services++;
 
        /* Hash the service into the service table */
        write_lock_bh(&__ip_vs_svc_lock);
@@ -1207,6 +1187,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
        *svc_p = svc;
        return 0;
 
+
  out_err:
        if (svc != NULL) {
                ip_vs_unbind_scheduler(svc);
@@ -1215,6 +1196,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
                        ip_vs_app_inc_put(svc->inc);
                        local_bh_enable();
                }
+               if (svc->stats.cpustats)
+                       free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
        ip_vs_scheduler_put(sched);
@@ -1248,7 +1231,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
        old_sched = sched;
 
        if (u->pe_name && *u->pe_name) {
-               pe = ip_vs_pe_get(u->pe_name);
+               pe = ip_vs_pe_getbyname(u->pe_name);
                if (pe == NULL) {
                        pr_info("persistence engine module ip_vs_pe_%s "
                                "not found\n", u->pe_name);
@@ -1334,14 +1317,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
        struct ip_vs_dest *dest, *nxt;
        struct ip_vs_scheduler *old_sched;
        struct ip_vs_pe *old_pe;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        pr_info("%s: enter\n", __func__);
 
        /* Count only IPv4 services for old get/setsockopt interface */
        if (svc->af == AF_INET)
-               ip_vs_num_services--;
+               ipvs->num_services--;
 
-       ip_vs_kill_estimator(&svc->stats);
+       ip_vs_kill_estimator(svc->net, &svc->stats);
 
        /* Unbind scheduler */
        old_sched = svc->scheduler;
@@ -1364,16 +1348,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
         */
        list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
                __ip_vs_unlink_dest(svc, dest, 0);
-               __ip_vs_del_dest(dest);
+               __ip_vs_del_dest(svc->net, dest);
        }
 
        /*
         *    Update the virtual service counters
         */
        if (svc->port == FTPPORT)
-               atomic_dec(&ip_vs_ftpsvc_counter);
+               atomic_dec(&ipvs->ftpsvc_counter);
        else if (svc->port == 0)
-               atomic_dec(&ip_vs_nullsvc_counter);
+               atomic_dec(&ipvs->nullsvc_counter);
 
        /*
         *    Free the service if nobody refers to it
@@ -1383,6 +1367,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
                              svc->fwmark,
                              IP_VS_DBG_ADDR(svc->af, &svc->addr),
                              ntohs(svc->port), atomic_read(&svc->usecnt));
+               free_percpu(svc->stats.cpustats);
                kfree(svc);
        }
 
@@ -1428,17 +1413,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *     Flush all the virtual services
  */
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
 {
        int idx;
        struct ip_vs_service *svc, *nxt;
 
        /*
-        * Flush the service table hashed by <protocol,addr,port>
+        * Flush the service table hashed by <netns,protocol,addr,port>
         */
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
-               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_unlink_service(svc);
+               list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+                                        s_list) {
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                }
        }
 
@@ -1448,7 +1435,8 @@ static int ip_vs_flush(void)
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry_safe(svc, nxt,
                                         &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_unlink_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_unlink_service(svc);
                }
        }
 
@@ -1472,24 +1460,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
        return 0;
 }
 
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
 {
        int idx;
        struct ip_vs_service *svc;
 
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                }
        }
 
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       ip_vs_zero_service(svc);
+                       if (net_eq(svc->net, net))
+                               ip_vs_zero_service(svc);
                }
        }
 
-       ip_vs_zero_stats(&ip_vs_stats);
+       ip_vs_zero_stats(net_ipvs(net)->tot_stats);
        return 0;
 }
 
@@ -1498,6 +1488,7 @@ static int
 proc_do_defense_mode(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
+       struct net *net = current->nsproxy->net_ns;
        int *valp = table->data;
        int val = *valp;
        int rc;
@@ -1508,7 +1499,7 @@ proc_do_defense_mode(ctl_table *table, int write,
                        /* Restore the correct value */
                        *valp = val;
                } else {
-                       update_defense_level();
+                       update_defense_level(net_ipvs(net));
                }
        }
        return rc;
@@ -1534,45 +1525,54 @@ proc_do_sync_threshold(ctl_table *table, int write,
        return rc;
 }
 
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+                    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int *valp = table->data;
+       int val = *valp;
+       int rc;
+
+       rc = proc_dointvec(table, write, buffer, lenp, ppos);
+       if (write && (*valp != val)) {
+               if ((*valp < 0) || (*valp > 1)) {
+                       /* Restore the correct value */
+                       *valp = val;
+               } else {
+                       struct net *net = current->nsproxy->net_ns;
+                       ip_vs_sync_switch_mode(net, val);
+               }
+       }
+       return rc;
+}
 
 /*
  *     IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ *     Do not change order or insert new entries without
+ *     align with netns init in __ip_vs_control_init()
  */
 
 static struct ctl_table vs_vars[] = {
        {
                .procname       = "amemthresh",
-               .data           = &sysctl_ip_vs_amemthresh,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
-#ifdef CONFIG_IP_VS_DEBUG
-       {
-               .procname       = "debug_level",
-               .data           = &sysctl_ip_vs_debug_level,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-#endif
        {
                .procname       = "am_droprate",
-               .data           = &sysctl_ip_vs_am_droprate,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
        {
                .procname       = "drop_entry",
-               .data           = &sysctl_ip_vs_drop_entry,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_defense_mode,
        },
        {
                .procname       = "drop_packet",
-               .data           = &sysctl_ip_vs_drop_packet,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_defense_mode,
@@ -1580,7 +1580,6 @@ static struct ctl_table vs_vars[] = {
 #ifdef CONFIG_IP_VS_NFCT
        {
                .procname       = "conntrack",
-               .data           = &sysctl_ip_vs_conntrack,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
@@ -1588,18 +1587,62 @@ static struct ctl_table vs_vars[] = {
 #endif
        {
                .procname       = "secure_tcp",
-               .data           = &sysctl_ip_vs_secure_tcp,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_defense_mode,
        },
        {
                .procname       = "snat_reroute",
-               .data           = &sysctl_ip_vs_snat_reroute,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .procname       = "sync_version",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_do_sync_mode,
+       },
+       {
+               .procname       = "cache_bypass",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_nodest_conn",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "expire_quiescent_template",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {
+               .procname       = "sync_threshold",
+               .maxlen         =
+                       sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+               .mode           = 0644,
+               .proc_handler   = proc_do_sync_threshold,
+       },
+       {
+               .procname       = "nat_icmp_send",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#ifdef CONFIG_IP_VS_DEBUG
+       {
+               .procname       = "debug_level",
+               .data           = &sysctl_ip_vs_debug_level,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#endif
 #if 0
        {
                .procname       = "timeout_established",
@@ -1686,41 +1729,6 @@ static struct ctl_table vs_vars[] = {
                .proc_handler   = proc_dointvec_jiffies,
        },
 #endif
-       {
-               .procname       = "cache_bypass",
-               .data           = &sysctl_ip_vs_cache_bypass,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "expire_nodest_conn",
-               .data           = &sysctl_ip_vs_expire_nodest_conn,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "expire_quiescent_template",
-               .data           = &sysctl_ip_vs_expire_quiescent_template,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
-       {
-               .procname       = "sync_threshold",
-               .data           = &sysctl_ip_vs_sync_threshold,
-               .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
-               .mode           = 0644,
-               .proc_handler   = proc_do_sync_threshold,
-       },
-       {
-               .procname       = "nat_icmp_send",
-               .data           = &sysctl_ip_vs_nat_icmp_send,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
        { }
 };
 
@@ -1732,11 +1740,10 @@ const struct ctl_path net_vs_ctl_path[] = {
 };
 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
 
-static struct ctl_table_header * sysctl_header;
-
 #ifdef CONFIG_PROC_FS
 
 struct ip_vs_iter {
+       struct seq_net_private p;  /* Do not move this, netns depends upon it*/
        struct list_head *table;
        int bucket;
 };
@@ -1763,6 +1770,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
 /* Get the Nth entry in the two lists */
 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 {
+       struct net *net = seq_file_net(seq);
        struct ip_vs_iter *iter = seq->private;
        int idx;
        struct ip_vs_service *svc;
@@ -1770,7 +1778,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
        /* look in hash by protocol */
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-                       if (pos-- == 0){
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                iter->table = ip_vs_svc_table;
                                iter->bucket = idx;
                                return svc;
@@ -1781,7 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
        /* keep looking in fwmark */
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-                       if (pos-- == 0) {
+                       if (net_eq(svc->net, net) && pos-- == 0) {
                                iter->table = ip_vs_svc_fwm_table;
                                iter->bucket = idx;
                                return svc;
@@ -1935,7 +1943,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
 
 static int ip_vs_info_open(struct inode *inode, struct file *file)
 {
-       return seq_open_private(file, &ip_vs_info_seq_ops,
+       return seq_open_net(inode, file, &ip_vs_info_seq_ops,
                        sizeof(struct ip_vs_iter));
 }
 
@@ -1949,13 +1957,11 @@ static const struct file_operations ip_vs_info_fops = {
 
 #endif
 
-struct ip_vs_stats ip_vs_stats = {
-       .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
-};
-
 #ifdef CONFIG_PROC_FS
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
@@ -1963,29 +1969,29 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
        seq_printf(seq,
                   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
-       spin_lock_bh(&ip_vs_stats.lock);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
-                  ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
-                  (unsigned long long) ip_vs_stats.ustats.inbytes,
-                  (unsigned long long) ip_vs_stats.ustats.outbytes);
+       spin_lock_bh(&tot_stats->lock);
+       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
+                  tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
 
 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
                   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
        seq_printf(seq,"%8X %8X %8X %16X %16X\n",
-                       ip_vs_stats.ustats.cps,
-                       ip_vs_stats.ustats.inpps,
-                       ip_vs_stats.ustats.outpps,
-                       ip_vs_stats.ustats.inbps,
-                       ip_vs_stats.ustats.outbps);
-       spin_unlock_bh(&ip_vs_stats.lock);
+                       tot_stats->ustats.cps,
+                       tot_stats->ustats.inpps,
+                       tot_stats->ustats.outpps,
+                       tot_stats->ustats.inbps,
+                       tot_stats->ustats.outbps);
+       spin_unlock_bh(&tot_stats->lock);
 
        return 0;
 }
 
 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
 {
-       return single_open(file, ip_vs_stats_show, NULL);
+       return single_open_net(inode, file, ip_vs_stats_show);
 }
 
 static const struct file_operations ip_vs_stats_fops = {
@@ -1996,13 +2002,68 @@ static const struct file_operations ip_vs_stats_fops = {
        .release = single_release,
 };
 
+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
+{
+       struct net *net = seq_file_single_net(seq);
+       struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
+       int i;
+
+/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                "       Total Incoming Outgoing         Incoming         Outgoing\n");
+       seq_printf(seq,
+                  "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
+
+       for_each_possible_cpu(i) {
+               struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
+               seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
+                           i, u->ustats.conns, u->ustats.inpkts,
+                           u->ustats.outpkts, (__u64)u->ustats.inbytes,
+                           (__u64)u->ustats.outbytes);
+       }
+
+       spin_lock_bh(&tot_stats->lock);
+       seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
+                  tot_stats->ustats.conns, tot_stats->ustats.inpkts,
+                  tot_stats->ustats.outpkts,
+                  (unsigned long long) tot_stats->ustats.inbytes,
+                  (unsigned long long) tot_stats->ustats.outbytes);
+
+/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_puts(seq,
+                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+       seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
+                       tot_stats->ustats.cps,
+                       tot_stats->ustats.inpps,
+                       tot_stats->ustats.outpps,
+                       tot_stats->ustats.inbps,
+                       tot_stats->ustats.outbps);
+       spin_unlock_bh(&tot_stats->lock);
+
+       return 0;
+}
+
+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
+{
+       return single_open_net(inode, file, ip_vs_stats_percpu_show);
+}
+
+static const struct file_operations ip_vs_stats_percpu_fops = {
+       .owner = THIS_MODULE,
+       .open = ip_vs_stats_percpu_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
 #endif
 
 /*
  *     Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 {
+       struct ip_vs_proto_data *pd;
+
        IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
                  u->tcp_timeout,
                  u->tcp_fin_timeout,
@@ -2010,19 +2071,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
        if (u->tcp_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
                        = u->tcp_timeout * HZ;
        }
 
        if (u->tcp_fin_timeout) {
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+               pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+               pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
                        = u->tcp_fin_timeout * HZ;
        }
 #endif
 
 #ifdef CONFIG_IP_VS_PROTO_UDP
        if (u->udp_timeout) {
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+               pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+               pd->timeout_table[IP_VS_UDP_S_NORMAL]
                        = u->udp_timeout * HZ;
        }
 #endif
@@ -2087,6 +2151,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
 static int
 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 {
+       struct net *net = sock_net(sk);
        int ret;
        unsigned char arg[MAX_ARG_LEN];
        struct ip_vs_service_user *usvc_compat;
@@ -2121,19 +2186,20 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
        if (cmd == IP_VS_SO_SET_FLUSH) {
                /* Flush the virtual service */
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
                /* Set timeout values for (tcp tcpfin udp) */
-               ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+               ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+               ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+                                       dm->syncid);
                goto out_unlock;
        } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
                struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
-               ret = stop_sync_thread(dm->state);
+               ret = stop_sync_thread(net, dm->state);
                goto out_unlock;
        }
 
@@ -2148,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
        if (cmd == IP_VS_SO_SET_ZERO) {
                /* if no service address is set, zero counters in all */
                if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-                       ret = ip_vs_zero_all();
+                       ret = ip_vs_zero_all(net);
                        goto out_unlock;
                }
        }
@@ -2165,10 +2231,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
        /* Lookup the exact service by <protocol, addr, port> or fwmark */
        if (usvc.fwmark == 0)
-               svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+               svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
                                           &usvc.addr, usvc.port);
        else
-               svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+               svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
 
        if (cmd != IP_VS_SO_SET_ADD
            && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2181,7 +2247,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
                if (svc != NULL)
                        ret = -EEXIST;
                else
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                break;
        case IP_VS_SO_SET_EDIT:
                ret = ip_vs_edit_service(svc, &usvc);
@@ -2241,7 +2307,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }
 
 static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+                           const struct ip_vs_get_services *get,
                            struct ip_vs_get_services __user *uptr)
 {
        int idx, count=0;
@@ -2252,7 +2319,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
                        /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                continue;
 
                        if (count >= get->num_services)
@@ -2271,7 +2338,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
        for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
                        /* Only expose IPv4 entries to old interface */
-                       if (svc->af != AF_INET)
+                       if (svc->af != AF_INET || !net_eq(svc->net, net))
                                continue;
 
                        if (count >= get->num_services)
@@ -2291,7 +2358,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
 }
 
 static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                         struct ip_vs_get_dests __user *uptr)
 {
        struct ip_vs_service *svc;
@@ -2299,9 +2366,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
        int ret = 0;
 
        if (get->fwmark)
-               svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+               svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
        else
-               svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+               svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
                                           get->port);
 
        if (svc) {
@@ -2336,17 +2403,19 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
 }
 
 static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 {
+       struct ip_vs_proto_data *pd;
+
 #ifdef CONFIG_IP_VS_PROTO_TCP
-       u->tcp_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
-       u->tcp_fin_timeout =
-               ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+       pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+       u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+       u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
+       pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
        u->udp_timeout =
-               ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+                       pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
 #endif
 }
 
@@ -2375,7 +2444,10 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
        unsigned char arg[128];
        int ret = 0;
        unsigned int copylen;
+       struct net *net = sock_net(sk);
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
+       BUG_ON(!net);
        if (!capable(CAP_NET_ADMIN))
                return -EPERM;
 
@@ -2418,7 +2490,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                struct ip_vs_getinfo info;
                info.version = IP_VS_VERSION_CODE;
                info.size = ip_vs_conn_tab_size;
-               info.num_services = ip_vs_num_services;
+               info.num_services = ipvs->num_services;
                if (copy_to_user(user, &info, sizeof(info)) != 0)
                        ret = -EFAULT;
        }
@@ -2437,7 +2509,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                        ret = -EINVAL;
                        goto out;
                }
-               ret = __ip_vs_get_service_entries(get, user);
+               ret = __ip_vs_get_service_entries(net, get, user);
        }
        break;
 
@@ -2450,10 +2522,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                entry = (struct ip_vs_service_entry *)arg;
                addr.ip = entry->addr;
                if (entry->fwmark)
-                       svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+                       svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
                else
-                       svc = __ip_vs_service_find(AF_INET, entry->protocol,
-                                                  &addr, entry->port);
+                       svc = __ip_vs_service_find(net, AF_INET,
+                                                  entry->protocol, &addr,
+                                                  entry->port);
                if (svc) {
                        ip_vs_copy_service(entry, svc);
                        if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2476,7 +2549,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                        ret = -EINVAL;
                        goto out;
                }
-               ret = __ip_vs_get_dest_entries(get, user);
+               ret = __ip_vs_get_dest_entries(net, get, user);
        }
        break;
 
@@ -2484,7 +2557,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
        {
                struct ip_vs_timeout_user t;
 
-               __ip_vs_get_timeouts(&t);
+               __ip_vs_get_timeouts(net, &t);
                if (copy_to_user(user, &t, sizeof(t)) != 0)
                        ret = -EFAULT;
        }
@@ -2495,15 +2568,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
                struct ip_vs_daemon_user d[2];
 
                memset(&d, 0, sizeof(d));
-               if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+               if (ipvs->sync_state & IP_VS_STATE_MASTER) {
                        d[0].state = IP_VS_STATE_MASTER;
-                       strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
-                       d[0].syncid = ip_vs_master_syncid;
+                       strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+                               sizeof(d[0].mcast_ifn));
+                       d[0].syncid = ipvs->master_syncid;
                }
-               if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+               if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
                        d[1].state = IP_VS_STATE_BACKUP;
-                       strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
-                       d[1].syncid = ip_vs_backup_syncid;
+                       strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+                               sizeof(d[1].mcast_ifn));
+                       d[1].syncid = ipvs->backup_syncid;
                }
                if (copy_to_user(user, &d, sizeof(d)) != 0)
                        ret = -EFAULT;
@@ -2542,6 +2617,7 @@ static struct genl_family ip_vs_genl_family = {
        .name           = IPVS_GENL_NAME,
        .version        = IPVS_GENL_VERSION,
        .maxattr        = IPVS_CMD_MAX,
+       .netnsok        = true,         /* Make ipvsadm to work on netns */
 };
 
 /* Policy used for first-level command attributes */
@@ -2696,11 +2772,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
        int idx = 0, i;
        int start = cb->args[0];
        struct ip_vs_service *svc;
+       struct net *net = skb_sknet(skb);
 
        mutex_lock(&__ip_vs_mutex);
        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
                list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-                       if (++idx <= start)
+                       if (++idx <= start || !net_eq(svc->net, net))
                                continue;
                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
                                idx--;
@@ -2711,7 +2788,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 
        for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
                list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-                       if (++idx <= start)
+                       if (++idx <= start || !net_eq(svc->net, net))
                                continue;
                        if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
                                idx--;
@@ -2727,7 +2804,8 @@ nla_put_failure:
        return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+                                   struct ip_vs_service_user_kern *usvc,
                                    struct nlattr *nla, int full_entry,
                                    struct ip_vs_service **ret_svc)
 {
@@ -2770,9 +2848,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
        }
 
        if (usvc->fwmark)
-               svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+               svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
        else
-               svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+               svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
                                           &usvc->addr, usvc->port);
        *ret_svc = svc;
 
@@ -2809,13 +2887,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
        return 0;
 }
 
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+                                                    struct nlattr *nla)
 {
        struct ip_vs_service_user_kern usvc;
        struct ip_vs_service *svc;
        int ret;
 
-       ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+       ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
        return ret ? ERR_PTR(ret) : svc;
 }
 
@@ -2883,6 +2962,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
        struct ip_vs_service *svc;
        struct ip_vs_dest *dest;
        struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+       struct net *net = skb_sknet(skb);
 
        mutex_lock(&__ip_vs_mutex);
 
@@ -2891,7 +2971,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
                        IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
                goto out_err;
 
-       svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+
+       svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
        if (IS_ERR(svc) || svc == NULL)
                goto out_err;
 
@@ -3005,20 +3086,23 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
                                   struct netlink_callback *cb)
 {
+       struct net *net = skb_net(skb);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
        mutex_lock(&__ip_vs_mutex);
-       if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+       if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
-                                          ip_vs_master_mcast_ifn,
-                                          ip_vs_master_syncid, cb) < 0)
+                                          ipvs->master_mcast_ifn,
+                                          ipvs->master_syncid, cb) < 0)
                        goto nla_put_failure;
 
                cb->args[0] = 1;
        }
 
-       if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+       if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
                if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
-                                          ip_vs_backup_mcast_ifn,
-                                          ip_vs_backup_syncid, cb) < 0)
+                                          ipvs->backup_mcast_ifn,
+                                          ipvs->backup_syncid, cb) < 0)
                        goto nla_put_failure;
 
                cb->args[1] = 1;
@@ -3030,31 +3114,33 @@ nla_put_failure:
        return skb->len;
 }
 
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 {
        if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
              attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
              attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
                return -EINVAL;
 
-       return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+       return start_sync_thread(net,
+                                nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
                                 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
 }
 
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
 {
        if (!attrs[IPVS_DAEMON_ATTR_STATE])
                return -EINVAL;
 
-       return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+       return stop_sync_thread(net,
+                               nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 }
 
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 {
        struct ip_vs_timeout_user t;
 
-       __ip_vs_get_timeouts(&t);
+       __ip_vs_get_timeouts(net, &t);
 
        if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
                t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3066,7 +3152,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
        if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
                t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
 
-       return ip_vs_set_timeout(&t);
+       return ip_vs_set_timeout(net, &t);
 }
 
 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3076,16 +3162,20 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        struct ip_vs_dest_user_kern udest;
        int ret = 0, cmd;
        int need_full_svc = 0, need_full_dest = 0;
+       struct net *net;
+       struct netns_ipvs *ipvs;
 
+       net = skb_sknet(skb);
+       ipvs = net_ipvs(net);
        cmd = info->genlhdr->cmd;
 
        mutex_lock(&__ip_vs_mutex);
 
        if (cmd == IPVS_CMD_FLUSH) {
-               ret = ip_vs_flush();
+               ret = ip_vs_flush(net);
                goto out;
        } else if (cmd == IPVS_CMD_SET_CONFIG) {
-               ret = ip_vs_genl_set_config(info->attrs);
+               ret = ip_vs_genl_set_config(net, info->attrs);
                goto out;
        } else if (cmd == IPVS_CMD_NEW_DAEMON ||
                   cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3101,13 +3191,13 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
                }
 
                if (cmd == IPVS_CMD_NEW_DAEMON)
-                       ret = ip_vs_genl_new_daemon(daemon_attrs);
+                       ret = ip_vs_genl_new_daemon(net, daemon_attrs);
                else
-                       ret = ip_vs_genl_del_daemon(daemon_attrs);
+                       ret = ip_vs_genl_del_daemon(net, daemon_attrs);
                goto out;
        } else if (cmd == IPVS_CMD_ZERO &&
                   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-               ret = ip_vs_zero_all();
+               ret = ip_vs_zero_all(net);
                goto out;
        }
 
@@ -3117,7 +3207,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
                need_full_svc = 1;
 
-       ret = ip_vs_genl_parse_service(&usvc,
+       ret = ip_vs_genl_parse_service(net, &usvc,
                                       info->attrs[IPVS_CMD_ATTR_SERVICE],
                                       need_full_svc, &svc);
        if (ret)
@@ -3147,7 +3237,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
        switch (cmd) {
        case IPVS_CMD_NEW_SERVICE:
                if (svc == NULL)
-                       ret = ip_vs_add_service(&usvc, &svc);
+                       ret = ip_vs_add_service(net, &usvc, &svc);
                else
                        ret = -EEXIST;
                break;
@@ -3185,7 +3275,11 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        struct sk_buff *msg;
        void *reply;
        int ret, cmd, reply_cmd;
+       struct net *net;
+       struct netns_ipvs *ipvs;
 
+       net = skb_sknet(skb);
+       ipvs = net_ipvs(net);
        cmd = info->genlhdr->cmd;
 
        if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3214,7 +3308,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        {
                struct ip_vs_service *svc;
 
-               svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+               svc = ip_vs_genl_find_service(net,
+                                             info->attrs[IPVS_CMD_ATTR_SERVICE]);
                if (IS_ERR(svc)) {
                        ret = PTR_ERR(svc);
                        goto out_err;
@@ -3234,7 +3329,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
        {
                struct ip_vs_timeout_user t;
 
-               __ip_vs_get_timeouts(&t);
+               __ip_vs_get_timeouts(net, &t);
 #ifdef CONFIG_IP_VS_PROTO_TCP
                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
                NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
@@ -3380,62 +3475,172 @@ static void ip_vs_genl_unregister(void)
 
 /* End of Generic Netlink interface definitions */
 
+/*
+ * per netns intit/exit func.
+ */
+int __net_init __ip_vs_control_init(struct net *net)
+{
+       int idx;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ctl_table *tbl;
+
+       atomic_set(&ipvs->dropentry, 0);
+       spin_lock_init(&ipvs->dropentry_lock);
+       spin_lock_init(&ipvs->droppacket_lock);
+       spin_lock_init(&ipvs->securetcp_lock);
+       ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+       /* Initialize rs_table */
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+       INIT_LIST_HEAD(&ipvs->dest_trash);
+       atomic_set(&ipvs->ftpsvc_counter, 0);
+       atomic_set(&ipvs->nullsvc_counter, 0);
+
+       /* procfs stats */
+       ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
+       if (ipvs->tot_stats == NULL) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
+       if (!ipvs->cpustats) {
+               pr_err("%s() alloc_percpu failed\n", __func__);
+               goto err_alloc;
+       }
+       spin_lock_init(&ipvs->tot_stats->lock);
+
+       for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+               INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
+       proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
+       proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
+       proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
+                            &ip_vs_stats_percpu_fops);
+
+       if (!net_eq(net, &init_net)) {
+               tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+               if (tbl == NULL)
+                       goto err_dup;
+       } else
+               tbl = vs_vars;
+       /* Initialize sysctl defaults */
+       idx = 0;
+       ipvs->sysctl_amemthresh = 1024;
+       tbl[idx++].data = &ipvs->sysctl_amemthresh;
+       ipvs->sysctl_am_droprate = 10;
+       tbl[idx++].data = &ipvs->sysctl_am_droprate;
+       tbl[idx++].data = &ipvs->sysctl_drop_entry;
+       tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+       tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+       tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+       ipvs->sysctl_snat_reroute = 1;
+       tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+       ipvs->sysctl_sync_ver = 1;
+       tbl[idx++].data = &ipvs->sysctl_sync_ver;
+       tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+       tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+       tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+       ipvs->sysctl_sync_threshold[0] = 3;
+       ipvs->sysctl_sync_threshold[1] = 50;
+       tbl[idx].data = &ipvs->sysctl_sync_threshold;
+       tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+       tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+       ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
+                                                 vs_vars);
+       if (ipvs->sysctl_hdr == NULL)
+               goto err_reg;
+       ip_vs_new_estimator(net, ipvs->tot_stats);
+       ipvs->sysctl_tbl = tbl;
+       /* Schedule defense work */
+       INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+       schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
+       return 0;
+
+err_reg:
+       if (!net_eq(net, &init_net))
+               kfree(tbl);
+err_dup:
+       free_percpu(ipvs->cpustats);
+err_alloc:
+       kfree(ipvs->tot_stats);
+       return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_control_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_trash_cleanup(net);
+       ip_vs_kill_estimator(net, ipvs->tot_stats);
+       cancel_delayed_work_sync(&ipvs->defense_work);
+       cancel_work_sync(&ipvs->defense_work.work);
+       unregister_net_sysctl_table(ipvs->sysctl_hdr);
+       proc_net_remove(net, "ip_vs_stats_percpu");
+       proc_net_remove(net, "ip_vs_stats");
+       proc_net_remove(net, "ip_vs");
+       free_percpu(ipvs->cpustats);
+       kfree(ipvs->tot_stats);
+}
+
+static struct pernet_operations ipvs_control_ops = {
+       .init = __ip_vs_control_init,
+       .exit = __ip_vs_control_cleanup,
+};
 
 int __init ip_vs_control_init(void)
 {
-       int ret;
        int idx;
+       int ret;
 
        EnterFunction(2);
 
-       /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+       /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
        for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
                INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
                INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
        }
-       for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
-               INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+       ret = register_pernet_subsys(&ipvs_control_ops);
+       if (ret) {
+               pr_err("cannot register namespace.\n");
+               goto err;
        }
-       smp_wmb();
+
+       smp_wmb();      /* Do we really need it now ? */
 
        ret = nf_register_sockopt(&ip_vs_sockopts);
        if (ret) {
                pr_err("cannot register sockopt.\n");
-               return ret;
+               goto err_net;
        }
 
        ret = ip_vs_genl_register();
        if (ret) {
                pr_err("cannot register Generic Netlink interface.\n");
                nf_unregister_sockopt(&ip_vs_sockopts);
-               return ret;
+               goto err_net;
        }
 
-       proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
-       proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
-
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
-
-       ip_vs_new_estimator(&ip_vs_stats);
-
-       /* Hook the defense timer */
-       schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
        LeaveFunction(2);
        return 0;
+
+err_net:
+       unregister_pernet_subsys(&ipvs_control_ops);
+err:
+       return ret;
 }
 
 
 void ip_vs_control_cleanup(void)
 {
        EnterFunction(2);
-       ip_vs_trash_cleanup();
-       cancel_delayed_work_sync(&defense_work);
-       cancel_work_sync(&defense_work.work);
-       ip_vs_kill_estimator(&ip_vs_stats);
-       unregister_sysctl_table(sysctl_header);
-       proc_net_remove(&init_net, "ip_vs_stats");
-       proc_net_remove(&init_net, "ip_vs");
+       unregister_pernet_subsys(&ipvs_control_ops);
        ip_vs_genl_unregister();
        nf_unregister_sockopt(&ip_vs_sockopts);
        LeaveFunction(2);
index ff28801..f560a05 100644 (file)
@@ -8,8 +8,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
- *
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              Affected data: est_list and est_lock.
+ *              estimation_timer() runs with timer per netns.
+ *              get_stats()) do the per cpu summing.
  */
 
 #define KMSG_COMPONENT "IPVS"
  */
 
 
-static void estimation_timer(unsigned long arg);
+/*
+ * Make a summary from each cpu
+ */
+static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+                                struct ip_vs_cpu_stats *stats)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
+               unsigned int start;
+               __u64 inbytes, outbytes;
+               if (i) {
+                       sum->conns += s->ustats.conns;
+                       sum->inpkts += s->ustats.inpkts;
+                       sum->outpkts += s->ustats.outpkts;
+                       do {
+                               start = u64_stats_fetch_begin_bh(&s->syncp);
+                               inbytes = s->ustats.inbytes;
+                               outbytes = s->ustats.outbytes;
+                       } while (u64_stats_fetch_retry_bh(&s->syncp, start));
+                       sum->inbytes += inbytes;
+                       sum->outbytes += outbytes;
+               } else {
+                       sum->conns = s->ustats.conns;
+                       sum->inpkts = s->ustats.inpkts;
+                       sum->outpkts = s->ustats.outpkts;
+                       do {
+                               start = u64_stats_fetch_begin_bh(&s->syncp);
+                               sum->inbytes = s->ustats.inbytes;
+                               sum->outbytes = s->ustats.outbytes;
+                       } while (u64_stats_fetch_retry_bh(&s->syncp, start));
+               }
+       }
+}
 
-static LIST_HEAD(est_list);
-static DEFINE_SPINLOCK(est_lock);
-static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
 
 static void estimation_timer(unsigned long arg)
 {
@@ -62,11 +97,16 @@ static void estimation_timer(unsigned long arg)
        u32 n_inpkts, n_outpkts;
        u64 n_inbytes, n_outbytes;
        u32 rate;
+       struct net *net = (struct net *)arg;
+       struct netns_ipvs *ipvs;
 
-       spin_lock(&est_lock);
-       list_for_each_entry(e, &est_list, list) {
+       ipvs = net_ipvs(net);
+       ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
+       spin_lock(&ipvs->est_lock);
+       list_for_each_entry(e, &ipvs->est_list, list) {
                s = container_of(e, struct ip_vs_stats, est);
 
+               ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
                spin_lock(&s->lock);
                n_conns = s->ustats.conns;
                n_inpkts = s->ustats.inpkts;
@@ -75,38 +115,39 @@ static void estimation_timer(unsigned long arg)
                n_outbytes = s->ustats.outbytes;
 
                /* scaled by 2^10, but divided 2 seconds */
-               rate = (n_conns - e->last_conns)<<9;
+               rate = (n_conns - e->last_conns) << 9;
                e->last_conns = n_conns;
-               e->cps += ((long)rate - (long)e->cps)>>2;
-               s->ustats.cps = (e->cps+0x1FF)>>10;
+               e->cps += ((long)rate - (long)e->cps) >> 2;
+               s->ustats.cps = (e->cps + 0x1FF) >> 10;
 
-               rate = (n_inpkts - e->last_inpkts)<<9;
+               rate = (n_inpkts - e->last_inpkts) << 9;
                e->last_inpkts = n_inpkts;
-               e->inpps += ((long)rate - (long)e->inpps)>>2;
-               s->ustats.inpps = (e->inpps+0x1FF)>>10;
+               e->inpps += ((long)rate - (long)e->inpps) >> 2;
+               s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
 
-               rate = (n_outpkts - e->last_outpkts)<<9;
+               rate = (n_outpkts - e->last_outpkts) << 9;
                e->last_outpkts = n_outpkts;
-               e->outpps += ((long)rate - (long)e->outpps)>>2;
-               s->ustats.outpps = (e->outpps+0x1FF)>>10;
+               e->outpps += ((long)rate - (long)e->outpps) >> 2;
+               s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
 
-               rate = (n_inbytes - e->last_inbytes)<<4;
+               rate = (n_inbytes - e->last_inbytes) << 4;
                e->last_inbytes = n_inbytes;
-               e->inbps += ((long)rate - (long)e->inbps)>>2;
-               s->ustats.inbps = (e->inbps+0xF)>>5;
+               e->inbps += ((long)rate - (long)e->inbps) >> 2;
+               s->ustats.inbps = (e->inbps + 0xF) >> 5;
 
-               rate = (n_outbytes - e->last_outbytes)<<4;
+               rate = (n_outbytes - e->last_outbytes) << 4;
                e->last_outbytes = n_outbytes;
-               e->outbps += ((long)rate - (long)e->outbps)>>2;
-               s->ustats.outbps = (e->outbps+0xF)>>5;
+               e->outbps += ((long)rate - (long)e->outbps) >> 2;
+               s->ustats.outbps = (e->outbps + 0xF) >> 5;
                spin_unlock(&s->lock);
        }
-       spin_unlock(&est_lock);
-       mod_timer(&est_timer, jiffies + 2*HZ);
+       spin_unlock(&ipvs->est_lock);
+       mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }
 
-void ip_vs_new_estimator(struct ip_vs_stats *stats)
+void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct ip_vs_estimator *est = &stats->est;
 
        INIT_LIST_HEAD(&est->list);
@@ -126,18 +167,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats)
        est->last_outbytes = stats->ustats.outbytes;
        est->outbps = stats->ustats.outbps<<5;
 
-       spin_lock_bh(&est_lock);
-       list_add(&est->list, &est_list);
-       spin_unlock_bh(&est_lock);
+       spin_lock_bh(&ipvs->est_lock);
+       list_add(&est->list, &ipvs->est_list);
+       spin_unlock_bh(&ipvs->est_lock);
 }
 
-void ip_vs_kill_estimator(struct ip_vs_stats *stats)
+void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct ip_vs_estimator *est = &stats->est;
 
-       spin_lock_bh(&est_lock);
+       spin_lock_bh(&ipvs->est_lock);
        list_del(&est->list);
-       spin_unlock_bh(&est_lock);
+       spin_unlock_bh(&ipvs->est_lock);
 }
 
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
@@ -157,13 +199,35 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
        est->outbps = 0;
 }
 
-int __init ip_vs_estimator_init(void)
+static int __net_init __ip_vs_estimator_init(struct net *net)
 {
-       mod_timer(&est_timer, jiffies + 2 * HZ);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->est_list);
+       spin_lock_init(&ipvs->est_lock);
+       setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+       mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
        return 0;
 }
 
+static void __net_exit __ip_vs_estimator_exit(struct net *net)
+{
+       del_timer_sync(&net_ipvs(net)->est_timer);
+}
+static struct pernet_operations ip_vs_app_ops = {
+       .init = __ip_vs_estimator_init,
+       .exit = __ip_vs_estimator_exit,
+};
+
+int __init ip_vs_estimator_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_app_ops);
+       return rv;
+}
+
 void ip_vs_estimator_cleanup(void)
 {
-       del_timer_sync(&est_timer);
+       unregister_pernet_subsys(&ip_vs_app_ops);
 }
index 7545500..6b5dd6d 100644 (file)
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
        int ret = 0;
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
+       struct net *net;
 
 #ifdef CONFIG_IP_VS_IPV6
        /* This application helper doesn't work with IPv6 yet,
@@ -197,18 +198,20 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                 */
                {
                        struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(AF_INET, iph->protocol,
-                                             &from, port, &cp->caddr, 0, &p);
+                       ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+                                             iph->protocol, &from, port,
+                                             &cp->caddr, 0, &p);
                        n_cp = ip_vs_conn_out_get(&p);
                }
                if (!n_cp) {
                        struct ip_vs_conn_param p;
-                       ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+                       ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+                                             AF_INET, IPPROTO_TCP, &cp->caddr,
                                              0, &cp->vaddr, port, &p);
                        n_cp = ip_vs_conn_new(&p, &from, port,
                                              IP_VS_CONN_F_NO_CPORT |
                                              IP_VS_CONN_F_NFCT,
-                                             cp->dest);
+                                             cp->dest, skb->mark);
                        if (!n_cp)
                                return 0;
 
@@ -257,8 +260,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
                 * would be adjusted twice.
                 */
 
+               net = skb_net(skb);
                cp->app_data = NULL;
-               ip_vs_tcp_conn_listen(n_cp);
+               ip_vs_tcp_conn_listen(net, n_cp);
                ip_vs_conn_put(n_cp);
                return ret;
        }
@@ -287,6 +291,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
        union nf_inet_addr to;
        __be16 port;
        struct ip_vs_conn *n_cp;
+       struct net *net;
 
 #ifdef CONFIG_IP_VS_IPV6
        /* This application helper doesn't work with IPv6 yet,
@@ -358,14 +363,15 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
        {
                struct ip_vs_conn_param p;
-               ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
-                                     &cp->vaddr, htons(ntohs(cp->vport)-1),
-                                     &p);
+               ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+                                     iph->protocol, &to, port, &cp->vaddr,
+                                     htons(ntohs(cp->vport)-1), &p);
                n_cp = ip_vs_conn_in_get(&p);
                if (!n_cp) {
                        n_cp = ip_vs_conn_new(&p, &cp->daddr,
                                              htons(ntohs(cp->dport)-1),
-                                             IP_VS_CONN_F_NFCT, cp->dest);
+                                             IP_VS_CONN_F_NFCT, cp->dest,
+                                             skb->mark);
                        if (!n_cp)
                                return 0;
 
@@ -377,7 +383,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
        /*
         *      Move tunnel to listen state
         */
-       ip_vs_tcp_conn_listen(n_cp);
+       net = skb_net(skb);
+       ip_vs_tcp_conn_listen(net, n_cp);
        ip_vs_conn_put(n_cp);
 
        return 1;
@@ -398,23 +405,22 @@ static struct ip_vs_app ip_vs_ftp = {
        .pkt_in =       ip_vs_ftp_in,
 };
 
-
 /*
- *     ip_vs_ftp initialization
+ *     per netns ip_vs_ftp initialization
  */
-static int __init ip_vs_ftp_init(void)
+static int __net_init __ip_vs_ftp_init(struct net *net)
 {
        int i, ret;
        struct ip_vs_app *app = &ip_vs_ftp;
 
-       ret = register_ip_vs_app(app);
+       ret = register_ip_vs_app(net, app);
        if (ret)
                return ret;
 
        for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
                if (!ports[i])
                        continue;
-               ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+               ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
                if (ret)
                        break;
                pr_info("%s: loaded support on port[%d] = %d\n",
@@ -422,18 +428,39 @@ static int __init ip_vs_ftp_init(void)
        }
 
        if (ret)
-               unregister_ip_vs_app(app);
+               unregister_ip_vs_app(net, app);
 
        return ret;
 }
+/*
+ *     netns exit
+ */
+static void __ip_vs_ftp_exit(struct net *net)
+{
+       struct ip_vs_app *app = &ip_vs_ftp;
+
+       unregister_ip_vs_app(net, app);
+}
+
+static struct pernet_operations ip_vs_ftp_ops = {
+       .init = __ip_vs_ftp_init,
+       .exit = __ip_vs_ftp_exit,
+};
 
+int __init ip_vs_ftp_init(void)
+{
+       int rv;
+
+       rv = register_pernet_subsys(&ip_vs_ftp_ops);
+       return rv;
+}
 
 /*
  *     ip_vs_ftp finish.
  */
 static void __exit ip_vs_ftp_exit(void)
 {
-       unregister_ip_vs_app(&ip_vs_ftp);
+       unregister_pernet_subsys(&ip_vs_ftp_ops);
 }
 
 
index 9323f89..d5bec33 100644 (file)
@@ -70,7 +70,6 @@
  *    entries that haven't been touched for a day.
  */
 #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
 
 
 /*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
 static ctl_table vs_vars_table[] = {
        {
                .procname       = "lblc_expiration",
-               .data           = &sysctl_ip_vs_lblc_expiration,
+               .data           = NULL,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
        { }
 };
 
-static struct ctl_table_header * sysctl_header;
-
 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
 {
        list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
        struct ip_vs_lblc_entry *en, *nxt;
        unsigned long now = jiffies;
        int i, j;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
                j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
                write_lock(&svc->sched_lock);
                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
                        if (time_before(now,
-                                       en->lastuse + sysctl_ip_vs_lblc_expiration))
+                                       en->lastuse +
+                                       ipvs->sysctl_lblc_expiration))
                                continue;
 
                        ip_vs_lblc_free(en);
@@ -543,23 +542,73 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
        .schedule =             ip_vs_lblc_schedule,
 };
 
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblc_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       if (!net_eq(net, &init_net)) {
+               ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+                                               sizeof(vs_vars_table),
+                                               GFP_KERNEL);
+               if (ipvs->lblc_ctl_table == NULL)
+                       goto err_dup;
+       } else
+               ipvs->lblc_ctl_table = vs_vars_table;
+       ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+       ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+       ipvs->lblc_ctl_header =
+               register_net_sysctl_table(net, net_vs_ctl_path,
+                                         ipvs->lblc_ctl_table);
+       if (!ipvs->lblc_ctl_header)
+               goto err_reg;
+
+       return 0;
+
+err_reg:
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblc_ctl_table);
+
+err_dup:
+       return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblc_exit(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       unregister_net_sysctl_table(ipvs->lblc_ctl_header);
+
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblc_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblc_ops = {
+       .init = __ip_vs_lblc_init,
+       .exit = __ip_vs_lblc_exit,
+};
 
 static int __init ip_vs_lblc_init(void)
 {
        int ret;
 
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+       ret = register_pernet_subsys(&ip_vs_lblc_ops);
+       if (ret)
+               return ret;
+
        ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
        if (ret)
-               unregister_sysctl_table(sysctl_header);
+               unregister_pernet_subsys(&ip_vs_lblc_ops);
        return ret;
 }
 
-
 static void __exit ip_vs_lblc_cleanup(void)
 {
-       unregister_sysctl_table(sysctl_header);
        unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
+       unregister_pernet_subsys(&ip_vs_lblc_ops);
 }
 
 
index dbeed8e..61ae8cf 100644 (file)
@@ -70,8 +70,6 @@
  *    entries that haven't been touched for a day.
  */
 #define COUNT_FOR_FULL_EXPIRATION   30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
 
 /*
  *     for IPVS lblcr entry hash table
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table {
 static ctl_table vs_vars_table[] = {
        {
                .procname       = "lblcr_expiration",
-               .data           = &sysctl_ip_vs_lblcr_expiration,
+               .data           = NULL,
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_jiffies,
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = {
        { }
 };
 
-static struct ctl_table_header * sysctl_header;
-
 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
 {
        list_del(&en->list);
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
        unsigned long now = jiffies;
        int i, j;
        struct ip_vs_lblcr_entry *en, *nxt;
+       struct netns_ipvs *ipvs = net_ipvs(svc->net);
 
        for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
                j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
 
                write_lock(&svc->sched_lock);
                list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
-                       if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
-                                      now))
+                       if (time_after(en->lastuse
+                                       + ipvs->sysctl_lblcr_expiration, now))
                                continue;
 
                        ip_vs_lblcr_free(en);
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
        read_lock(&svc->sched_lock);
        en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
        if (en) {
+               struct netns_ipvs *ipvs = net_ipvs(svc->net);
                /* We only hold a read lock, but this is atomic */
                en->lastuse = jiffies;
 
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
                /* More than one destination + enough time passed by, cleanup */
                if (atomic_read(&en->set.size) > 1 &&
                                time_after(jiffies, en->set.lastmod +
-                               sysctl_ip_vs_lblcr_expiration)) {
+                               ipvs->sysctl_lblcr_expiration)) {
                        struct ip_vs_dest *m;
 
                        write_lock(&en->set.lock);
@@ -744,23 +742,73 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
        .schedule =             ip_vs_lblcr_schedule,
 };
 
+/*
+ *  per netns init.
+ */
+static int __net_init __ip_vs_lblcr_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       if (!net_eq(net, &init_net)) {
+               ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+                                               sizeof(vs_vars_table),
+                                               GFP_KERNEL);
+               if (ipvs->lblcr_ctl_table == NULL)
+                       goto err_dup;
+       } else
+               ipvs->lblcr_ctl_table = vs_vars_table;
+       ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+       ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+       ipvs->lblcr_ctl_header =
+               register_net_sysctl_table(net, net_vs_ctl_path,
+                                         ipvs->lblcr_ctl_table);
+       if (!ipvs->lblcr_ctl_header)
+               goto err_reg;
+
+       return 0;
+
+err_reg:
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblcr_ctl_table);
+
+err_dup:
+       return -ENOMEM;
+}
+
+static void __net_exit __ip_vs_lblcr_exit(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
+
+       if (!net_eq(net, &init_net))
+               kfree(ipvs->lblcr_ctl_table);
+}
+
+static struct pernet_operations ip_vs_lblcr_ops = {
+       .init = __ip_vs_lblcr_init,
+       .exit = __ip_vs_lblcr_exit,
+};
 
 static int __init ip_vs_lblcr_init(void)
 {
        int ret;
 
-       sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
+       ret = register_pernet_subsys(&ip_vs_lblcr_ops);
+       if (ret)
+               return ret;
+
        ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
        if (ret)
-               unregister_sysctl_table(sysctl_header);
+               unregister_pernet_subsys(&ip_vs_lblcr_ops);
        return ret;
 }
 
-
 static void __exit ip_vs_lblcr_cleanup(void)
 {
-       unregister_sysctl_table(sysctl_header);
        unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+       unregister_pernet_subsys(&ip_vs_lblcr_ops);
 }
 
 
index 4680647..f454c80 100644 (file)
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
        struct nf_conntrack_tuple *orig, new_reply;
        struct ip_vs_conn *cp;
        struct ip_vs_conn_param p;
+       struct net *net = nf_ct_net(ct);
 
        if (exp->tuple.src.l3num != PF_INET)
                return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 
        /* RS->CLIENT */
        orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-       ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+       ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
                              &orig->src.u3, orig->src.u.tcp.port,
                              &orig->dst.u3, orig->dst.u.tcp.port, &p);
        cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
                " for conn " FMT_CONN "\n",
                __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
 
-       h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+       h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+                                 &tuple);
        if (h) {
                ct = nf_ct_tuplehash_to_ctrack(h);
                /* Show what happens instead of calling nf_ct_kill() */
index 3414af7..5cf859c 100644 (file)
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
 }
 
 /* Get pe in the pe list by name */
-static struct ip_vs_pe *
-ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
 {
        struct ip_vs_pe *pe;
 
-       IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+       IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
                  pe_name);
 
        spin_lock_bh(&ip_vs_pe_lock);
@@ -60,28 +59,22 @@ ip_vs_pe_getbyname(const char *pe_name)
 }
 
 /* Lookup pe and try to load it if it doesn't exist */
-struct ip_vs_pe *ip_vs_pe_get(const char *name)
+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)
 {
        struct ip_vs_pe *pe;
 
        /* Search for the pe by name */
-       pe = ip_vs_pe_getbyname(name);
+       pe = __ip_vs_pe_getbyname(name);
 
        /* If pe not found, load the module and search again */
        if (!pe) {
                request_module("ip_vs_pe_%s", name);
-               pe = ip_vs_pe_getbyname(name);
+               pe = __ip_vs_pe_getbyname(name);
        }
 
        return pe;
 }
 
-void ip_vs_pe_put(struct ip_vs_pe *pe)
-{
-       if (pe && pe->module)
-               module_put(pe->module);
-}
-
 /* Register a pe in the pe list */
 int register_ip_vs_pe(struct ip_vs_pe *pe)
 {
index b8b4e96..0d83bc0 100644 (file)
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
        struct ip_vs_iphdr iph;
        unsigned int dataoff, datalen, matchoff, matchlen;
        const char *dptr;
+       int retc;
 
        ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
 
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
        if (dataoff >= skb->len)
                return -EINVAL;
 
+       if ((retc=skb_linearize(skb)) < 0)
+               return retc;
        dptr = skb->data + dataoff;
        datalen = skb->len - dataoff;
 
index c539983..6ac986c 100644 (file)
@@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
        return 0;
 }
 
+/*
+ *     register an ipvs protocols netns related data
+ */
+static int
+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
+       struct ip_vs_proto_data *pd =
+                       kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+
+       if (!pd) {
+               pr_err("%s(): no memory.\n", __func__);
+               return -ENOMEM;
+       }
+       pd->pp = pp;    /* For speed issues */
+       pd->next = ipvs->proto_data_table[hash];
+       ipvs->proto_data_table[hash] = pd;
+       atomic_set(&pd->appcnt, 0);     /* Init app counter */
+
+       if (pp->init_netns != NULL)
+               pp->init_netns(net, pd);
+
+       return 0;
+}
 
 /*
  *     unregister an ipvs protocol
@@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
        return -ESRCH;
 }
 
+/*
+ *     unregister an ipvs protocols netns data
+ */
+static int
+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data **pd_p;
+       unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);
+
+       pd_p = &ipvs->proto_data_table[hash];
+       for (; *pd_p; pd_p = &(*pd_p)->next) {
+               if (*pd_p == pd) {
+                       *pd_p = pd->next;
+                       if (pd->pp->exit_netns != NULL)
+                               pd->pp->exit_netns(net, pd);
+                       kfree(pd);
+                       return 0;
+               }
+       }
+
+       return -ESRCH;
+}
 
 /*
  *     get ip_vs_protocol object by its proto.
@@ -100,19 +148,44 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
 }
 EXPORT_SYMBOL(ip_vs_proto_get);
 
+/*
+ *     get ip_vs_protocol object data by netns and proto
+ */
+struct ip_vs_proto_data *
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+{
+       struct ip_vs_proto_data *pd;
+       unsigned hash = IP_VS_PROTO_HASH(proto);
+
+       for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {
+               if (pd->pp->protocol == proto)
+                       return pd;
+       }
+
+       return NULL;
+}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       return __ipvs_proto_data_get(ipvs, proto);
+}
+EXPORT_SYMBOL(ip_vs_proto_data_get);
 
 /*
  *     Propagate event for state change to all protocols
  */
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
 {
-       struct ip_vs_protocol *pp;
+       struct ip_vs_proto_data *pd;
        int i;
 
        for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
-               for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
-                       if (pp->timeout_change)
-                               pp->timeout_change(pp, flags);
+               for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+                       if (pd->pp->timeout_change)
+                               pd->pp->timeout_change(pd, flags);
                }
        }
 }
@@ -236,6 +309,46 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
                ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);
 }
 
+/*
+ * per network name-space init
+ */
+static int __net_init __ip_vs_protocol_init(struct net *net)
+{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_SCTP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+       register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
+       return 0;
+}
+
+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd;
+       int i;
+
+       /* unregister all the ipvs proto data for this netns */
+       for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+               while ((pd = ipvs->proto_data_table[i]) != NULL)
+                       unregister_ip_vs_proto_netns(net, pd);
+       }
+}
+
+static struct pernet_operations ipvs_proto_ops = {
+       .init = __ip_vs_protocol_init,
+       .exit = __ip_vs_protocol_cleanup,
+};
 
 int __init ip_vs_protocol_init(void)
 {
@@ -265,6 +378,7 @@ int __init ip_vs_protocol_init(void)
        REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
        pr_info("Registered protocols (%s)\n", &protocols[2]);
+       return register_pernet_subsys(&ipvs_proto_ops);
 
        return 0;
 }
@@ -275,6 +389,7 @@ void ip_vs_protocol_cleanup(void)
        struct ip_vs_protocol *pp;
        int i;
 
+       unregister_pernet_subsys(&ipvs_proto_ops);
        /* unregister all the ipvs protocols */
        for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
                while ((pp = ip_vs_proto_table[i]) != NULL)
index 3a04611..5b8eb8b 100644 (file)
@@ -41,28 +41,30 @@ struct isakmp_hdr {
 #define PORT_ISAKMP    500
 
 static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
-                            int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+                            const struct ip_vs_iphdr *iph, int inverse,
+                            struct ip_vs_conn_param *p)
 {
        if (likely(!inverse))
-               ip_vs_conn_fill_param(af, IPPROTO_UDP,
+               ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
                                      &iph->saddr, htons(PORT_ISAKMP),
                                      &iph->daddr, htons(PORT_ISAKMP), p);
        else
-               ip_vs_conn_fill_param(af, IPPROTO_UDP,
+               ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
                                      &iph->daddr, htons(PORT_ISAKMP),
                                      &iph->saddr, htons(PORT_ISAKMP), p);
 }
 
 static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
                   const struct ip_vs_iphdr *iph, unsigned int proto_off,
                   int inverse)
 {
        struct ip_vs_conn *cp;
        struct ip_vs_conn_param p;
+       struct net *net = skb_net(skb);
 
-       ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+       ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
        cp = ip_vs_conn_in_get(&p);
        if (!cp) {
                /*
@@ -72,7 +74,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
                IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
                              "%s%s %s->%s\n",
                              inverse ? "ICMP+" : "",
-                             pp->name,
+                             ip_vs_proto_get(iph->protocol)->name,
                              IP_VS_DBG_ADDR(af, &iph->saddr),
                              IP_VS_DBG_ADDR(af, &iph->daddr));
        }
@@ -83,21 +85,21 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
 
 static struct ip_vs_conn *
 ah_esp_conn_out_get(int af, const struct sk_buff *skb,
-                   struct ip_vs_protocol *pp,
                    const struct ip_vs_iphdr *iph,
                    unsigned int proto_off,
                    int inverse)
 {
        struct ip_vs_conn *cp;
        struct ip_vs_conn_param p;
+       struct net *net = skb_net(skb);
 
-       ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+       ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
        cp = ip_vs_conn_out_get(&p);
        if (!cp) {
                IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
                              "%s%s %s->%s\n",
                              inverse ? "ICMP+" : "",
-                             pp->name,
+                             ip_vs_proto_get(iph->protocol)->name,
                              IP_VS_DBG_ADDR(af, &iph->saddr),
                              IP_VS_DBG_ADDR(af, &iph->daddr));
        }
@@ -107,7 +109,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 
 
 static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                     int *verdict, struct ip_vs_conn **cpp)
 {
        /*
@@ -117,26 +119,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
        return 0;
 }
 
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
-       /* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
-       /* nothing to do now */
-}
-
-
 #ifdef CONFIG_IP_VS_PROTO_AH
 struct ip_vs_protocol ip_vs_protocol_ah = {
        .name =                 "AH",
        .protocol =             IPPROTO_AH,
        .num_states =           1,
        .dont_defrag =          1,
-       .init =                 ah_esp_init,
-       .exit =                 ah_esp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
        .conn_schedule =        ah_esp_conn_schedule,
        .conn_in_get =          ah_esp_conn_in_get,
        .conn_out_get =         ah_esp_conn_out_get,
@@ -149,7 +139,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
        .app_conn_bind =        NULL,
        .debug_packet =         ip_vs_tcpudp_debug_packet,
        .timeout_change =       NULL,           /* ISAKMP */
-       .set_state_timeout =    NULL,
 };
 #endif
 
@@ -159,8 +148,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
        .protocol =             IPPROTO_ESP,
        .num_states =           1,
        .dont_defrag =          1,
-       .init =                 ah_esp_init,
-       .exit =                 ah_esp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
        .conn_schedule =        ah_esp_conn_schedule,
        .conn_in_get =          ah_esp_conn_in_get,
        .conn_out_get =         ah_esp_conn_out_get,
index 1ea96bc..fb2d04a 100644 (file)
@@ -9,9 +9,10 @@
 #include <net/ip_vs.h>
 
 static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                   int *verdict, struct ip_vs_conn **cpp)
 {
+       struct net *net;
        struct ip_vs_service *svc;
        sctp_chunkhdr_t _schunkh, *sch;
        sctp_sctphdr_t *sh, _sctph;
@@ -27,13 +28,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                                 sizeof(_schunkh), &_schunkh);
        if (sch == NULL)
                return 0;
-
+       net = skb_net(skb);
        if ((sch->type == SCTP_CID_INIT) &&
-           (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+           (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
                                     &iph.daddr, sh->dest))) {
                int ignored;
 
-               if (ip_vs_todrop()) {
+               if (ip_vs_todrop(net_ipvs(net))) {
                        /*
                         * It seems that we are very loaded.
                         * We have to drop this packet :(
@@ -46,14 +47,19 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 * Let the virtual server select a real server for the
                 * incoming connection, and create a connection entry.
                 */
-               *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pd);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                        return 0;
                }
                ip_vs_service_put(svc);
        }
-
+       /* NF_ACCEPT */
        return 1;
 }
 
@@ -856,7 +862,7 @@ static struct ipvs_sctp_nextstate
 /*
  *      Timeout table[state]
  */
-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
        [IP_VS_SCTP_S_NONE]         =     2 * HZ,
        [IP_VS_SCTP_S_INIT_CLI]     =     1 * 60 * HZ,
        [IP_VS_SCTP_S_INIT_SER]     =     1 * 60 * HZ,
@@ -900,20 +906,8 @@ static const char *sctp_state_name(int state)
        return "?";
 }
 
-static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)
-{
-}
-
-static int
-sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-
-return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,
-                               sctp_state_name_table, sname, to);
-}
-
 static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
                int direction, const struct sk_buff *skb)
 {
        sctp_chunkhdr_t _sctpch, *sch;
@@ -971,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 
                IP_VS_DBG_BUF(8, "%s %s  %s:%d->"
                                "%s:%d state: %s->%s conn->refcnt:%d\n",
-                               pp->name,
+                               pd->pp->name,
                                ((direction == IP_VS_DIR_OUTPUT) ?
                                 "output " : "input "),
                                IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -995,75 +989,73 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
                        }
                }
        }
+       if (likely(pd))
+               cp->timeout = pd->timeout_table[cp->state = next_state];
+       else    /* What to do ? */
+               cp->timeout = sctp_timeouts[cp->state = next_state];
 
-        cp->timeout = pp->timeout_table[cp->state = next_state];
-
-        return 1;
+       return 1;
 }
 
 static int
 sctp_state_transition(struct ip_vs_conn *cp, int direction,
-               const struct sk_buff *skb, struct ip_vs_protocol *pp)
+               const struct sk_buff *skb, struct ip_vs_proto_data *pd)
 {
        int ret = 0;
 
        spin_lock(&cp->lock);
-       ret = set_sctp_state(pp, cp, direction, skb);
+       ret = set_sctp_state(pd, cp, direction, skb);
        spin_unlock(&cp->lock);
 
        return ret;
 }
 
-/*
- *      Hash table for SCTP application incarnations
- */
-#define SCTP_APP_TAB_BITS        4
-#define SCTP_APP_TAB_SIZE        (1 << SCTP_APP_TAB_BITS)
-#define SCTP_APP_TAB_MASK        (SCTP_APP_TAB_SIZE - 1)
-
-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(sctp_app_lock);
-
 static inline __u16 sctp_app_hashkey(__be16 port)
 {
        return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
                & SCTP_APP_TAB_MASK;
 }
 
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
 {
        struct ip_vs_app *i;
        __u16 hash;
        __be16 port = inc->port;
        int ret = 0;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
 
        hash = sctp_app_hashkey(port);
 
-       spin_lock_bh(&sctp_app_lock);
-       list_for_each_entry(i, &sctp_apps[hash], p_list) {
+       spin_lock_bh(&ipvs->sctp_app_lock);
+       list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
                if (i->port == port) {
                        ret = -EEXIST;
                        goto out;
                }
        }
-       list_add(&inc->p_list, &sctp_apps[hash]);
-       atomic_inc(&ip_vs_protocol_sctp.appcnt);
+       list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
+       atomic_inc(&pd->appcnt);
 out:
-       spin_unlock_bh(&sctp_app_lock);
+       spin_unlock_bh(&ipvs->sctp_app_lock);
 
        return ret;
 }
 
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-       spin_lock_bh(&sctp_app_lock);
-       atomic_dec(&ip_vs_protocol_sctp.appcnt);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+
+       spin_lock_bh(&ipvs->sctp_app_lock);
+       atomic_dec(&pd->appcnt);
        list_del(&inc->p_list);
-       spin_unlock_bh(&sctp_app_lock);
+       spin_unlock_bh(&ipvs->sctp_app_lock);
 }
 
 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 {
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
        int hash;
        struct ip_vs_app *inc;
        int result = 0;
@@ -1074,12 +1066,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
        /* Lookup application incarnations and bind the right one */
        hash = sctp_app_hashkey(cp->vport);
 
-       spin_lock(&sctp_app_lock);
-       list_for_each_entry(inc, &sctp_apps[hash], p_list) {
+       spin_lock(&ipvs->sctp_app_lock);
+       list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {
                if (inc->port == cp->vport) {
                        if (unlikely(!ip_vs_app_inc_get(inc)))
                                break;
-                       spin_unlock(&sctp_app_lock);
+                       spin_unlock(&ipvs->sctp_app_lock);
 
                        IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
                                        "%s:%u to app %s on port %u\n",
@@ -1095,43 +1087,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
                        goto out;
                }
        }
-       spin_unlock(&sctp_app_lock);
+       spin_unlock(&ipvs->sctp_app_lock);
 out:
        return result;
 }
 
-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-       IP_VS_INIT_HASH_TABLE(sctp_apps);
-       pp->timeout_table = sctp_timeouts;
-}
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
+       ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
+       spin_lock_init(&ipvs->tcp_app_lock);
+       pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
+                                                       sizeof(sctp_timeouts));
+}
 
-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
-
+       kfree(pd->timeout_table);
 }
 
 struct ip_vs_protocol ip_vs_protocol_sctp = {
-       .name = "SCTP",
-       .protocol = IPPROTO_SCTP,
-       .num_states = IP_VS_SCTP_S_LAST,
-       .dont_defrag = 0,
-       .appcnt = ATOMIC_INIT(0),
-       .init = ip_vs_sctp_init,
-       .exit = ip_vs_sctp_exit,
-       .register_app = sctp_register_app,
+       .name           = "SCTP",
+       .protocol       = IPPROTO_SCTP,
+       .num_states     = IP_VS_SCTP_S_LAST,
+       .dont_defrag    = 0,
+       .init           = NULL,
+       .exit           = NULL,
+       .init_netns     = __ip_vs_sctp_init,
+       .exit_netns     = __ip_vs_sctp_exit,
+       .register_app   = sctp_register_app,
        .unregister_app = sctp_unregister_app,
-       .conn_schedule = sctp_conn_schedule,
-       .conn_in_get = ip_vs_conn_in_get_proto,
-       .conn_out_get = ip_vs_conn_out_get_proto,
-       .snat_handler = sctp_snat_handler,
-       .dnat_handler = sctp_dnat_handler,
-       .csum_check = sctp_csum_check,
-       .state_name = sctp_state_name,
+       .conn_schedule  = sctp_conn_schedule,
+       .conn_in_get    = ip_vs_conn_in_get_proto,
+       .conn_out_get   = ip_vs_conn_out_get_proto,
+       .snat_handler   = sctp_snat_handler,
+       .dnat_handler   = sctp_dnat_handler,
+       .csum_check     = sctp_csum_check,
+       .state_name     = sctp_state_name,
        .state_transition = sctp_state_transition,
-       .app_conn_bind = sctp_app_conn_bind,
-       .debug_packet = ip_vs_tcpudp_debug_packet,
-       .timeout_change = sctp_timeout_change,
-       .set_state_timeout = sctp_set_state_timeout,
+       .app_conn_bind  = sctp_app_conn_bind,
+       .debug_packet   = ip_vs_tcpudp_debug_packet,
+       .timeout_change = NULL,
 };
index f6c5200..c0cc341 100644 (file)
@@ -9,8 +9,12 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
  *
+ *              Network name space (netns) aware.
+ *              Global data moved to netns i.e struct netns_ipvs
+ *              tcp_timeouts table has copy per netns in a hash table per
+ *              protocol ip_vs_proto_data and is handled by netns
  */
 
 #define KMSG_COMPONENT "IPVS"
 #include <net/ip_vs.h>
 
 static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                  int *verdict, struct ip_vs_conn **cpp)
 {
+       struct net *net;
        struct ip_vs_service *svc;
        struct tcphdr _tcph, *th;
        struct ip_vs_iphdr iph;
@@ -42,14 +47,14 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                *verdict = NF_DROP;
                return 0;
        }
-
+       net = skb_net(skb);
        /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
        if (th->syn &&
-           (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
-                                    th->dest))) {
+           (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+                                    &iph.daddr, th->dest))) {
                int ignored;
 
-               if (ip_vs_todrop()) {
+               if (ip_vs_todrop(net_ipvs(net))) {
                        /*
                         * It seems that we are very loaded.
                         * We have to drop this packet :(
@@ -63,13 +68,19 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 * Let the virtual server select a real server for the
                 * incoming connection, and create a connection entry.
                 */
-               *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pd);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                        return 0;
                }
                ip_vs_service_put(svc);
        }
+       /* NF_ACCEPT */
        return 1;
 }
 
@@ -338,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
 /*
  *     Timeout table[state]
  */
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
        [IP_VS_TCP_S_NONE]              =       2*HZ,
        [IP_VS_TCP_S_ESTABLISHED]       =       15*60*HZ,
        [IP_VS_TCP_S_SYN_SENT]          =       2*60*HZ,
@@ -437,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
 };
 
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
 {
        int on = (flags & 1);           /* secure_tcp */
 
@@ -450,14 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
        ** for most if not for all of the applications. Something
        ** like "capabilities" (flags) for each object.
        */
-       tcp_state_table = (on? tcp_states_dos : tcp_states);
-}
-
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-       return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
-                                      tcp_state_name_table, sname, to);
+       pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
 }
 
 static inline int tcp_state_idx(struct tcphdr *th)
@@ -474,7 +475,7 @@ static inline int tcp_state_idx(struct tcphdr *th)
 }
 
 static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
              int direction, struct tcphdr *th)
 {
        int state_idx;
@@ -497,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
                goto tcp_state_out;
        }
 
-       new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+       new_state =
+               pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
 
   tcp_state_out:
        if (new_state != cp->state) {
@@ -505,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
 
                IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
                              "%s:%d state: %s->%s conn->refcnt:%d\n",
-                             pp->name,
+                             pd->pp->name,
                              ((state_off == TCP_DIR_OUTPUT) ?
                               "output " : "input "),
                              th->syn ? 'S' : '.',
@@ -535,17 +537,19 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
                }
        }
 
-       cp->timeout = pp->timeout_table[cp->state = new_state];
+       if (likely(pd))
+               cp->timeout = pd->timeout_table[cp->state = new_state];
+       else    /* What to do ? */
+               cp->timeout = tcp_timeouts[cp->state = new_state];
 }
 
-
 /*
  *     Handle state transitions
  */
 static int
 tcp_state_transition(struct ip_vs_conn *cp, int direction,
                     const struct sk_buff *skb,
-                    struct ip_vs_protocol *pp)
+                    struct ip_vs_proto_data *pd)
 {
        struct tcphdr _tcph, *th;
 
@@ -560,23 +564,12 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
                return 0;
 
        spin_lock(&cp->lock);
-       set_tcp_state(pp, cp, direction, th);
+       set_tcp_state(pd, cp, direction, th);
        spin_unlock(&cp->lock);
 
        return 1;
 }
 
-
-/*
- *     Hash table for TCP application incarnations
- */
-#define        TCP_APP_TAB_BITS        4
-#define        TCP_APP_TAB_SIZE        (1 << TCP_APP_TAB_BITS)
-#define        TCP_APP_TAB_MASK        (TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
 static inline __u16 tcp_app_hashkey(__be16 port)
 {
        return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -584,44 +577,50 @@ static inline __u16 tcp_app_hashkey(__be16 port)
 }
 
 
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 {
        struct ip_vs_app *i;
        __u16 hash;
        __be16 port = inc->port;
        int ret = 0;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
 
        hash = tcp_app_hashkey(port);
 
-       spin_lock_bh(&tcp_app_lock);
-       list_for_each_entry(i, &tcp_apps[hash], p_list) {
+       spin_lock_bh(&ipvs->tcp_app_lock);
+       list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
                if (i->port == port) {
                        ret = -EEXIST;
                        goto out;
                }
        }
-       list_add(&inc->p_list, &tcp_apps[hash]);
-       atomic_inc(&ip_vs_protocol_tcp.appcnt);
+       list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+       atomic_inc(&pd->appcnt);
 
   out:
-       spin_unlock_bh(&tcp_app_lock);
+       spin_unlock_bh(&ipvs->tcp_app_lock);
        return ret;
 }
 
 
 static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-       spin_lock_bh(&tcp_app_lock);
-       atomic_dec(&ip_vs_protocol_tcp.appcnt);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
+       spin_lock_bh(&ipvs->tcp_app_lock);
+       atomic_dec(&pd->appcnt);
        list_del(&inc->p_list);
-       spin_unlock_bh(&tcp_app_lock);
+       spin_unlock_bh(&ipvs->tcp_app_lock);
 }
 
 
 static int
 tcp_app_conn_bind(struct ip_vs_conn *cp)
 {
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
        int hash;
        struct ip_vs_app *inc;
        int result = 0;
@@ -633,12 +632,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
        /* Lookup application incarnations and bind the right one */
        hash = tcp_app_hashkey(cp->vport);
 
-       spin_lock(&tcp_app_lock);
-       list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+       spin_lock(&ipvs->tcp_app_lock);
+       list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
                if (inc->port == cp->vport) {
                        if (unlikely(!ip_vs_app_inc_get(inc)))
                                break;
-                       spin_unlock(&tcp_app_lock);
+                       spin_unlock(&ipvs->tcp_app_lock);
 
                        IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
                                      "%s:%u to app %s on port %u\n",
@@ -655,7 +654,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
                        goto out;
                }
        }
-       spin_unlock(&tcp_app_lock);
+       spin_unlock(&ipvs->tcp_app_lock);
 
   out:
        return result;
@@ -665,24 +664,35 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 /*
  *     Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  */
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
 {
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
        spin_lock(&cp->lock);
        cp->state = IP_VS_TCP_S_LISTEN;
-       cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+       cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+                          : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
        spin_unlock(&cp->lock);
 }
 
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ *   timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-       IP_VS_INIT_HASH_TABLE(tcp_apps);
-       pp->timeout_table = tcp_timeouts;
-}
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
+       ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+       spin_lock_init(&ipvs->tcp_app_lock);
+       pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+                                                       sizeof(tcp_timeouts));
+       pd->tcp_state_table =  tcp_states;
+}
 
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+       kfree(pd->timeout_table);
 }
 
 
@@ -691,9 +701,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
        .protocol =             IPPROTO_TCP,
        .num_states =           IP_VS_TCP_S_LAST,
        .dont_defrag =          0,
-       .appcnt =               ATOMIC_INIT(0),
-       .init =                 ip_vs_tcp_init,
-       .exit =                 ip_vs_tcp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
+       .init_netns =           __ip_vs_tcp_init,
+       .exit_netns =           __ip_vs_tcp_exit,
        .register_app =         tcp_register_app,
        .unregister_app =       tcp_unregister_app,
        .conn_schedule =        tcp_conn_schedule,
@@ -707,5 +718,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
        .app_conn_bind =        tcp_app_conn_bind,
        .debug_packet =         ip_vs_tcpudp_debug_packet,
        .timeout_change =       tcp_timeout_change,
-       .set_state_timeout =    tcp_set_state_timeout,
 };
index 9d106a0..f1282cb 100644 (file)
@@ -9,7 +9,8 @@
  *              as published by the Free Software Foundation; either version
  *              2 of the License, or (at your option) any later version.
  *
- * Changes:
+ * Changes:     Hans Schillstrom <hans.schillstrom@ericsson.com>
+ *              Network name space (netns) aware.
  *
  */
 
 #include <net/ip6_checksum.h>
 
 static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
                  int *verdict, struct ip_vs_conn **cpp)
 {
+       struct net *net;
        struct ip_vs_service *svc;
        struct udphdr _udph, *uh;
        struct ip_vs_iphdr iph;
@@ -42,13 +44,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                *verdict = NF_DROP;
                return 0;
        }
-
-       svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+       net = skb_net(skb);
+       svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
                                &iph.daddr, uh->dest);
        if (svc) {
                int ignored;
 
-               if (ip_vs_todrop()) {
+               if (ip_vs_todrop(net_ipvs(net))) {
                        /*
                         * It seems that we are very loaded.
                         * We have to drop this packet :(
@@ -62,13 +64,19 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
                 * Let the virtual server select a real server for the
                 * incoming connection, and create a connection entry.
                 */
-               *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
-               if (!*cpp && !ignored) {
-                       *verdict = ip_vs_leave(svc, skb, pp);
+               *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
+               if (!*cpp && ignored <= 0) {
+                       if (!ignored)
+                               *verdict = ip_vs_leave(svc, skb, pd);
+                       else {
+                               ip_vs_service_put(svc);
+                               *verdict = NF_DROP;
+                       }
                        return 0;
                }
                ip_vs_service_put(svc);
        }
+       /* NF_ACCEPT */
        return 1;
 }
 
@@ -338,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
        return 1;
 }
 
-
-/*
- *     Note: the caller guarantees that only one of register_app,
- *     unregister_app or app_conn_bind is called each time.
- */
-
-#define        UDP_APP_TAB_BITS        4
-#define        UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
-#define        UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
 static inline __u16 udp_app_hashkey(__be16 port)
 {
        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -358,44 +353,50 @@ static inline __u16 udp_app_hashkey(__be16 port)
 }
 
 
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 {
        struct ip_vs_app *i;
        __u16 hash;
        __be16 port = inc->port;
        int ret = 0;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
 
        hash = udp_app_hashkey(port);
 
 
-       spin_lock_bh(&udp_app_lock);
-       list_for_each_entry(i, &udp_apps[hash], p_list) {
+       spin_lock_bh(&ipvs->udp_app_lock);
+       list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
                if (i->port == port) {
                        ret = -EEXIST;
                        goto out;
                }
        }
-       list_add(&inc->p_list, &udp_apps[hash]);
-       atomic_inc(&ip_vs_protocol_udp.appcnt);
+       list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+       atomic_inc(&pd->appcnt);
 
   out:
-       spin_unlock_bh(&udp_app_lock);
+       spin_unlock_bh(&ipvs->udp_app_lock);
        return ret;
 }
 
 
 static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 {
-       spin_lock_bh(&udp_app_lock);
-       atomic_dec(&ip_vs_protocol_udp.appcnt);
+       struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       spin_lock_bh(&ipvs->udp_app_lock);
+       atomic_dec(&pd->appcnt);
        list_del(&inc->p_list);
-       spin_unlock_bh(&udp_app_lock);
+       spin_unlock_bh(&ipvs->udp_app_lock);
 }
 
 
 static int udp_app_conn_bind(struct ip_vs_conn *cp)
 {
+       struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
        int hash;
        struct ip_vs_app *inc;
        int result = 0;
@@ -407,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
        /* Lookup application incarnations and bind the right one */
        hash = udp_app_hashkey(cp->vport);
 
-       spin_lock(&udp_app_lock);
-       list_for_each_entry(inc, &udp_apps[hash], p_list) {
+       spin_lock(&ipvs->udp_app_lock);
+       list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
                if (inc->port == cp->vport) {
                        if (unlikely(!ip_vs_app_inc_get(inc)))
                                break;
-                       spin_unlock(&udp_app_lock);
+                       spin_unlock(&ipvs->udp_app_lock);
 
                        IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
                                      "%s:%u to app %s on port %u\n",
@@ -429,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
                        goto out;
                }
        }
-       spin_unlock(&udp_app_lock);
+       spin_unlock(&ipvs->udp_app_lock);
 
   out:
        return result;
 }
 
 
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
        [IP_VS_UDP_S_LAST]              =       2*HZ,
 };
@@ -446,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
        [IP_VS_UDP_S_LAST]              =       "BUG!",
 };
 
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
-       return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
-                                      udp_state_name_table, sname, to);
-}
-
 static const char * udp_state_name(int state)
 {
        if (state >= IP_VS_UDP_S_LAST)
@@ -464,20 +457,30 @@ static const char * udp_state_name(int state)
 static int
 udp_state_transition(struct ip_vs_conn *cp, int direction,
                     const struct sk_buff *skb,
-                    struct ip_vs_protocol *pp)
+                    struct ip_vs_proto_data *pd)
 {
-       cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+       if (unlikely(!pd)) {
+               pr_err("UDP no ns data\n");
+               return 0;
+       }
+
+       cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
        return 1;
 }
 
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 {
-       IP_VS_INIT_HASH_TABLE(udp_apps);
-       pp->timeout_table = udp_timeouts;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+       spin_lock_init(&ipvs->udp_app_lock);
+       pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+                                                       sizeof(udp_timeouts));
 }
 
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
 {
+       kfree(pd->timeout_table);
 }
 
 
@@ -486,8 +489,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
        .protocol =             IPPROTO_UDP,
        .num_states =           IP_VS_UDP_S_LAST,
        .dont_defrag =          0,
-       .init =                 udp_init,
-       .exit =                 udp_exit,
+       .init =                 NULL,
+       .exit =                 NULL,
+       .init_netns =           __udp_init,
+       .exit_netns =           __udp_exit,
        .conn_schedule =        udp_conn_schedule,
        .conn_in_get =          ip_vs_conn_in_get_proto,
        .conn_out_get =         ip_vs_conn_out_get_proto,
@@ -501,5 +506,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
        .app_conn_bind =        udp_app_conn_bind,
        .debug_packet =         ip_vs_tcpudp_debug_packet,
        .timeout_change =       NULL,
-       .set_state_timeout =    udp_set_state_timeout,
 };
index ab85aed..d1adf98 100644 (file)
@@ -5,6 +5,18 @@
  *              high-performance and highly available server based on a
  *              cluster of servers.
  *
+ * Version 1,   is capable of handling both version 0 and 1 messages.
+ *              Version 0 is the plain old format.
+ *              Note Version 0 receivers will just drop Ver 1 messages.
+ *              Version 1 is capable of handle IPv6, Persistence data,
+ *              time-outs, and firewall marks.
+ *              In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ *              Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions  Message: is a complete datagram
+ *              Sync_conn: is a part of a Message
+ *              Param Data is an option to a Sync_conn.
+ *
  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
  *
  * ip_vs_sync:  sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
  *     Alexandre Cassen        :       Added SyncID support for incoming sync
  *                                     messages filtering.
  *     Justin Ossevoort        :       Fix endian problem on sync message size.
+ *     Hans Schillstrom        :       Added Version 1: i.e. IPv6,
+ *                                     Persistence support, fwmark and time-out.
  */
 
 #define KMSG_COMPONENT "IPVS"
@@ -35,6 +49,8 @@
 #include <linux/wait.h>
 #include <linux/kernel.h>
 
+#include <asm/unaligned.h>             /* Used for ntoh_seq and hton_seq */
+
 #include <net/ip.h>
 #include <net/sock.h>
 
 #define IP_VS_SYNC_GROUP 0xe0000051    /* multicast addr - 224.0.0.81 */
 #define IP_VS_SYNC_PORT  8848          /* multicast port */
 
+#define SYNC_PROTO_VER  1              /* Protocol version in header */
 
 /*
  *     IPVS sync connection entry
+ *     Version 0, i.e. original version.
  */
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
        __u8                    reserved;
 
        /* Protocol, addresses and port numbers */
@@ -71,41 +89,159 @@ struct ip_vs_sync_conn_options {
        struct ip_vs_seq        out_seq;        /* outgoing seq. struct */
 };
 
+/*
+     Sync Connection format (sync_conn)
+
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |    Type       |    Protocol   | Ver.  |        Size           |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             Flags                             |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            State              |         cport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |            vport              |         dport                 |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             fwmark                            |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                             timeout  (in sec.)                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                              ...                              |
+      |                        IP-Addresses  (v4 or v6)               |
+      |                              ...                              |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+  Optional Parameters.
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      | Param. Type    | Param. Length |   Param. data                |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
+      |                              ...                              |
+      |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                               | Param Type    | Param. Length |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                           Param  data                         |
+      |         Last Param data should be padded for 32 bit alignment |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ *  Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+       __u8                    type;
+       __u8                    protocol;       /* Which protocol (TCP/UDP) */
+       __be16                  ver_size;       /* Version msb 4 bits */
+       /* Flags and state transition */
+       __be32                  flags;          /* status flags */
+       __be16                  state;          /* state info   */
+       /* Protocol, addresses and port numbers */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __be32                  fwmark;         /* Firewall mark from skb */
+       __be32                  timeout;        /* cp timeout */
+       __be32                  caddr;          /* client address */
+       __be32                  vaddr;          /* virtual address */
+       __be32                  daddr;          /* destination address */
+       /* The sequence options start here */
+       /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+       __u8                    type;
+       __u8                    protocol;       /* Which protocol (TCP/UDP) */
+       __be16                  ver_size;       /* Version msb 4 bits */
+       /* Flags and state transition */
+       __be32                  flags;          /* status flags */
+       __be16                  state;          /* state info   */
+       /* Protocol, addresses and port numbers */
+       __be16                  cport;
+       __be16                  vport;
+       __be16                  dport;
+       __be32                  fwmark;         /* Firewall mark from skb */
+       __be32                  timeout;        /* cp timeout */
+       struct in6_addr         caddr;          /* client address */
+       struct in6_addr         vaddr;          /* virtual address */
+       struct in6_addr         daddr;          /* destination address */
+       /* The sequence options start here */
+       /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+       struct ip_vs_sync_v4    v4;
+       struct ip_vs_sync_v6    v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6            0
+#define STYPE_F_INET6          (1 << STYPE_INET6)
+
+#define SVER_SHIFT             12              /* Shift to get version */
+#define SVER_MASK              0x0fff          /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA      1
+#define IPVS_OPT_PE_DATA       2
+#define IPVS_OPT_PE_NAME       3
+#define IPVS_OPT_PARAM         7
+
+#define IPVS_OPT_F_SEQ_DATA    (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA     (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME     (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM       (1 << (IPVS_OPT_PARAM-1))
+
 struct ip_vs_sync_thread_data {
+       struct net *net;
        struct socket *sock;
        char *buf;
 };
 
-#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE  (sizeof(struct ip_vs_sync_conn_v0))
 #define FULL_CONN_SIZE  \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
 
 
 /*
-  The master mulitcasts messages to the backup load balancers in the
-  following format.
+  The master mulitcasts messages (Datagrams) to the backup load balancers
+  in the following format.
+
+ Version 1:
+  Note, first byte should be Zero, so ver 0 receivers will drop the packet.
 
        0                   1                   2                   3
        0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-      |  Count Conns  |    SyncID     |            Size               |
+      |      0        |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    Version    |    Reserved, set to Zero      |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (1)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                            .                                  |
-      |                            .                                  |
+      ~                            .                                  ~
       |                            .                                  |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
       |                                                               |
       |                    IPVS Sync Connection (n)                   |
       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+       0                   1                   2                   3
+       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |  Count Conns  |    SyncID     |            Size               |
+      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+      |                    IPVS Sync Connection (1)                   |
 */
 
 #define SYNC_MESG_HEADER_LEN   4
 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
 
-struct ip_vs_sync_mesg {
+/* Version 0 header */
+struct ip_vs_sync_mesg_v0 {
        __u8                    nr_conns;
        __u8                    syncid;
        __u16                   size;
@@ -113,9 +249,16 @@ struct ip_vs_sync_mesg {
        /* ip_vs_sync_conn entries start here */
 };
 
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
+/* Version 1 header */
+struct ip_vs_sync_mesg {
+       __u8                    reserved;       /* must be zero */
+       __u8                    syncid;
+       __u16                   size;
+       __u8                    nr_conns;
+       __s8                    version;        /* SYNC_PROTO_VER  */
+       __u16                   spare;
+       /* ip_vs_sync_conn entries start here */
+};
 
 struct ip_vs_sync_buff {
        struct list_head        list;
@@ -127,28 +270,6 @@ struct ip_vs_sync_buff {
        unsigned char           *end;
 };
 
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff   *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
 /* multicast addr */
 static struct sockaddr_in mcast_addr = {
        .sin_family             = AF_INET,
@@ -156,41 +277,71 @@ static struct sockaddr_in mcast_addr = {
        .sin_addr.s_addr        = cpu_to_be32(IP_VS_SYNC_GROUP),
 };
 
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+       ho->init_seq       = get_unaligned_be32(&no->init_seq);
+       ho->delta          = get_unaligned_be32(&no->delta);
+       ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
+
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+       put_unaligned_be32(ho->init_seq, &no->init_seq);
+       put_unaligned_be32(ho->delta, &no->delta);
+       put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
 
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
 {
        struct ip_vs_sync_buff *sb;
 
-       spin_lock_bh(&ip_vs_sync_lock);
-       if (list_empty(&ip_vs_sync_queue)) {
+       spin_lock_bh(&ipvs->sync_lock);
+       if (list_empty(&ipvs->sync_queue)) {
                sb = NULL;
        } else {
-               sb = list_entry(ip_vs_sync_queue.next,
+               sb = list_entry(ipvs->sync_queue.next,
                                struct ip_vs_sync_buff,
                                list);
                list_del(&sb->list);
        }
-       spin_unlock_bh(&ip_vs_sync_lock);
+       spin_unlock_bh(&ipvs->sync_lock);
 
        return sb;
 }
 
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
 {
        struct ip_vs_sync_buff *sb;
 
        if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
                return NULL;
 
-       if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+       sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+       if (!sb->mesg) {
                kfree(sb);
                return NULL;
        }
+       sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zeo now */
+       sb->mesg->version = SYNC_PROTO_VER;
+       sb->mesg->syncid = ipvs->master_syncid;
+       sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
        sb->mesg->nr_conns = 0;
-       sb->mesg->syncid = ip_vs_master_syncid;
-       sb->mesg->size = 4;
-       sb->head = (unsigned char *)sb->mesg + 4;
-       sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+       sb->mesg->spare = 0;
+       sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
+       sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
+
        sb->firstuse = jiffies;
        return sb;
 }
@@ -201,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
        kfree(sb);
 }
 
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
 {
-       spin_lock(&ip_vs_sync_lock);
-       if (ip_vs_sync_state & IP_VS_STATE_MASTER)
-               list_add_tail(&sb->list, &ip_vs_sync_queue);
+       struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+       spin_lock(&ipvs->sync_lock);
+       if (ipvs->sync_state & IP_VS_STATE_MASTER)
+               list_add_tail(&sb->list, &ipvs->sync_queue);
        else
                ip_vs_sync_buff_release(sb);
-       spin_unlock(&ip_vs_sync_lock);
+       spin_unlock(&ipvs->sync_lock);
 }
 
 /*
@@ -216,36 +369,101 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
  *     than the specified time or the specified time is zero.
  */
 static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
 {
        struct ip_vs_sync_buff *sb;
 
-       spin_lock_bh(&curr_sb_lock);
-       if (curr_sb && (time == 0 ||
-                       time_before(jiffies - curr_sb->firstuse, time))) {
-               sb = curr_sb;
-               curr_sb = NULL;
+       spin_lock_bh(&ipvs->sync_buff_lock);
+       if (ipvs->sync_buff && (time == 0 ||
+           time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+               sb = ipvs->sync_buff;
+               ipvs->sync_buff = NULL;
        } else
                sb = NULL;
-       spin_unlock_bh(&curr_sb_lock);
+       spin_unlock_bh(&ipvs->sync_buff_lock);
        return sb;
 }
 
+/*
+ * Switch mode from sending version 0 or 1
+ *  - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       if (!ipvs->sync_state & IP_VS_STATE_MASTER)
+               return;
+       if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
+               return;
+
+       spin_lock_bh(&ipvs->sync_buff_lock);
+       /* Buffer empty ? then let buf_create do the job  */
+       if (ipvs->sync_buff->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
+               kfree(ipvs->sync_buff);
+               ipvs->sync_buff = NULL;
+       } else {
+               spin_lock_bh(&ipvs->sync_lock);
+               if (ipvs->sync_state & IP_VS_STATE_MASTER)
+                       list_add_tail(&ipvs->sync_buff->list,
+                                     &ipvs->sync_queue);
+               else
+                       ip_vs_sync_buff_release(ipvs->sync_buff);
+               spin_unlock_bh(&ipvs->sync_lock);
+       }
+       spin_unlock_bh(&ipvs->sync_buff_lock);
+}
 
 /*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
+{
+       struct ip_vs_sync_buff *sb;
+       struct ip_vs_sync_mesg_v0 *mesg;
+
+       if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+               return NULL;
+
+       sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+       if (!sb->mesg) {
+               kfree(sb);
+               return NULL;
+       }
+       mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+       mesg->nr_conns = 0;
+       mesg->syncid = ipvs->master_syncid;
+       mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+       sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+       sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
+       sb->firstuse = jiffies;
+       return sb;
+}
+
+/*
+ *      Version 0 , could be switched in by sys_ctl.
  *      Add an ip_vs_conn information into the current sync_buff.
- *      Called by ip_vs_in.
  */
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
 {
-       struct ip_vs_sync_mesg *m;
-       struct ip_vs_sync_conn *s;
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_sync_mesg_v0 *m;
+       struct ip_vs_sync_conn_v0 *s;
        int len;
 
-       spin_lock(&curr_sb_lock);
-       if (!curr_sb) {
-               if (!(curr_sb=ip_vs_sync_buff_create())) {
-                       spin_unlock(&curr_sb_lock);
+       if (unlikely(cp->af != AF_INET))
+               return;
+       /* Do not sync ONE PACKET */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               return;
+
+       spin_lock(&ipvs->sync_buff_lock);
+       if (!ipvs->sync_buff) {
+               ipvs->sync_buff =
+                       ip_vs_sync_buff_create_v0(ipvs);
+               if (!ipvs->sync_buff) {
+                       spin_unlock(&ipvs->sync_buff_lock);
                        pr_err("ip_vs_sync_buff_create failed.\n");
                        return;
                }
@@ -253,10 +471,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
        len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
                SIMPLE_CONN_SIZE;
-       m = curr_sb->mesg;
-       s = (struct ip_vs_sync_conn *)curr_sb->head;
+       m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+       s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
 
        /* copy members */
+       s->reserved = 0;
        s->protocol = cp->protocol;
        s->cport = cp->cport;
        s->vport = cp->vport;
@@ -274,83 +493,366 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 
        m->nr_conns++;
        m->size += len;
-       curr_sb->head += len;
+       ipvs->sync_buff->head += len;
 
        /* check if there is a space for next one */
-       if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
-               sb_queue_tail(curr_sb);
-               curr_sb = NULL;
+       if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+               sb_queue_tail(ipvs);
+               ipvs->sync_buff = NULL;
        }
-       spin_unlock(&curr_sb_lock);
+       spin_unlock(&ipvs->sync_buff_lock);
 
        /* synchronize its controller if it has */
        if (cp->control)
-               ip_vs_sync_conn(cp->control);
+               ip_vs_sync_conn(net, cp->control);
+}
+
+/*
+ *      Add an ip_vs_conn information into the current sync_buff.
+ *      Called by ip_vs_in.
+ *      Sending Version 1 messages
+ */
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_sync_mesg *m;
+       union ip_vs_sync_conn *s;
+       __u8 *p;
+       unsigned int len, pe_name_len, pad;
+
+       /* Handle old version of the protocol */
+       if (ipvs->sysctl_sync_ver == 0) {
+               ip_vs_sync_conn_v0(net, cp);
+               return;
+       }
+       /* Do not sync ONE PACKET */
+       if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+               goto control;
+sloop:
+       /* Sanity checks */
+       pe_name_len = 0;
+       if (cp->pe_data_len) {
+               if (!cp->pe_data || !cp->dest) {
+                       IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+                       return;
+               }
+               pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+       }
+
+       spin_lock(&ipvs->sync_buff_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6)
+               len = sizeof(struct ip_vs_sync_v6);
+       else
+#endif
+               len = sizeof(struct ip_vs_sync_v4);
+
+       if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+               len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+       if (cp->pe_data_len)
+               len += cp->pe_data_len + 2;     /* + Param hdr field */
+       if (pe_name_len)
+               len += pe_name_len + 2;
+
+       /* check if there is a space for this one  */
+       pad = 0;
+       if (ipvs->sync_buff) {
+               pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+               if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+                       sb_queue_tail(ipvs);
+                       ipvs->sync_buff = NULL;
+                       pad = 0;
+               }
+       }
+
+       if (!ipvs->sync_buff) {
+               ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+               if (!ipvs->sync_buff) {
+                       spin_unlock(&ipvs->sync_buff_lock);
+                       pr_err("ip_vs_sync_buff_create failed.\n");
+                       return;
+               }
+       }
+
+       m = ipvs->sync_buff->mesg;
+       p = ipvs->sync_buff->head;
+       ipvs->sync_buff->head += pad + len;
+       m->size += pad + len;
+       /* Add ev. padding from prev. sync_conn */
+       while (pad--)
+               *(p++) = 0;
+
+       s = (union ip_vs_sync_conn *)p;
+
+       /* Set message type  & copy members */
+       s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+       s->v4.ver_size = htons(len & SVER_MASK);        /* Version 0 */
+       s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+       s->v4.state = htons(cp->state);
+       s->v4.protocol = cp->protocol;
+       s->v4.cport = cp->cport;
+       s->v4.vport = cp->vport;
+       s->v4.dport = cp->dport;
+       s->v4.fwmark = htonl(cp->fwmark);
+       s->v4.timeout = htonl(cp->timeout / HZ);
+       m->nr_conns++;
+
+#ifdef CONFIG_IP_VS_IPV6
+       if (cp->af == AF_INET6) {
+               p += sizeof(struct ip_vs_sync_v6);
+               ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+               ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+               ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+       } else
+#endif
+       {
+               p += sizeof(struct ip_vs_sync_v4);      /* options ptr */
+               s->v4.caddr = cp->caddr.ip;
+               s->v4.vaddr = cp->vaddr.ip;
+               s->v4.daddr = cp->daddr.ip;
+       }
+       if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+               *(p++) = IPVS_OPT_SEQ_DATA;
+               *(p++) = sizeof(struct ip_vs_sync_conn_options);
+               hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+               p += sizeof(struct ip_vs_seq);
+               hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+               p += sizeof(struct ip_vs_seq);
+       }
+       /* Handle pe data */
+       if (cp->pe_data_len && cp->pe_data) {
+               *(p++) = IPVS_OPT_PE_DATA;
+               *(p++) = cp->pe_data_len;
+               memcpy(p, cp->pe_data, cp->pe_data_len);
+               p += cp->pe_data_len;
+               if (pe_name_len) {
+                       /* Add PE_NAME */
+                       *(p++) = IPVS_OPT_PE_NAME;
+                       *(p++) = pe_name_len;
+                       memcpy(p, cp->pe->name, pe_name_len);
+                       p += pe_name_len;
+               }
+       }
+
+       spin_unlock(&ipvs->sync_buff_lock);
+
+control:
+       /* synchronize its controller if it has */
+       cp = cp->control;
+       if (!cp)
+               return;
+       /*
+        * Reduce sync rate for templates
+        * i.e only increment in_pkts for Templates.
+        */
+       if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+               int pkts = atomic_add_return(1, &cp->in_pkts);
+
+               if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
+                       return;
+       }
+       goto sloop;
 }
 
+/*
+ *  fill_param used by version 1
+ */
 static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
-                          const union nf_inet_addr *caddr, __be16 cport,
-                          const union nf_inet_addr *vaddr, __be16 vport,
-                          struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+                          struct ip_vs_conn_param *p,
+                          __u8 *pe_data, unsigned int pe_data_len,
+                          __u8 *pe_name, unsigned int pe_name_len)
 {
-       /* XXX: Need to take into account persistence engine */
-       ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+       if (af == AF_INET6)
+               ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+                                     (const union nf_inet_addr *)&sc->v6.caddr,
+                                     sc->v6.cport,
+                                     (const union nf_inet_addr *)&sc->v6.vaddr,
+                                     sc->v6.vport, p);
+       else
+#endif
+               ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+                                     (const union nf_inet_addr *)&sc->v4.caddr,
+                                     sc->v4.cport,
+                                     (const union nf_inet_addr *)&sc->v4.vaddr,
+                                     sc->v4.vport, p);
+       /* Handle pe data */
+       if (pe_data_len) {
+               if (pe_name_len) {
+                       char buff[IP_VS_PENAME_MAXLEN+1];
+
+                       memcpy(buff, pe_name, pe_name_len);
+                       buff[pe_name_len]=0;
+                       p->pe = __ip_vs_pe_getbyname(buff);
+                       if (!p->pe) {
+                               IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+                                            buff);
+                               return 1;
+                       }
+               } else {
+                       IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+                       return 1;
+               }
+
+               p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+               if (!p->pe_data) {
+                       if (p->pe->module)
+                               module_put(p->pe->module);
+                       return -ENOMEM;
+               }
+               memcpy(p->pe_data, pe_data, pe_data_len);
+               p->pe_data_len = pe_data_len;
+       }
        return 0;
 }
 
 /*
- *      Process received multicast message and create the corresponding
- *      ip_vs_conn entries.
+ *  Connection Add / Update.
+ *  Common for version 0 and 1 reception of backup sync_conns.
+ *  Param: ...
+ *         timeout is in sec.
  */
-static void ip_vs_process_message(const char *buffer, const size_t buflen)
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+                           unsigned int flags, unsigned int state,
+                           unsigned int protocol, unsigned int type,
+                           const union nf_inet_addr *daddr, __be16 dport,
+                           unsigned long timeout, __u32 fwmark,
+                           struct ip_vs_sync_conn_options *opt)
 {
-       struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
-       struct ip_vs_sync_conn *s;
-       struct ip_vs_sync_conn_options *opt;
-       struct ip_vs_conn *cp;
-       struct ip_vs_protocol *pp;
        struct ip_vs_dest *dest;
-       struct ip_vs_conn_param param;
-       char *p;
-       int i;
+       struct ip_vs_conn *cp;
+       struct netns_ipvs *ipvs = net_ipvs(net);
 
-       if (buflen < sizeof(struct ip_vs_sync_mesg)) {
-               IP_VS_ERR_RL("sync message header too short\n");
-               return;
-       }
+       if (!(flags & IP_VS_CONN_F_TEMPLATE))
+               cp = ip_vs_conn_in_get(param);
+       else
+               cp = ip_vs_ct_in_get(param);
 
-       /* Convert size back to host byte order */
-       m->size = ntohs(m->size);
+       if (cp && param->pe_data)       /* Free pe_data */
+               kfree(param->pe_data);
+       if (!cp) {
+               /*
+                * Find the appropriate destination for the connection.
+                * If it is not found the connection will remain unbound
+                * but still handled.
+                */
+               dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
+                                      param->vport, protocol, fwmark);
 
-       if (buflen != m->size) {
-               IP_VS_ERR_RL("bogus sync message size\n");
-               return;
+               /*  Set the approprite ativity flag */
+               if (protocol == IPPROTO_TCP) {
+                       if (state != IP_VS_TCP_S_ESTABLISHED)
+                               flags |= IP_VS_CONN_F_INACTIVE;
+                       else
+                               flags &= ~IP_VS_CONN_F_INACTIVE;
+               } else if (protocol == IPPROTO_SCTP) {
+                       if (state != IP_VS_SCTP_S_ESTABLISHED)
+                               flags |= IP_VS_CONN_F_INACTIVE;
+                       else
+                               flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
+               cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+               if (dest)
+                       atomic_dec(&dest->refcnt);
+               if (!cp) {
+                       if (param->pe_data)
+                               kfree(param->pe_data);
+                       IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+                       return;
+               }
+       } else if (!cp->dest) {
+               dest = ip_vs_try_bind_dest(cp);
+               if (dest)
+                       atomic_dec(&dest->refcnt);
+       } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+               (cp->state != state)) {
+               /* update active/inactive flag for the connection */
+               dest = cp->dest;
+               if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                       (state != IP_VS_TCP_S_ESTABLISHED)) {
+                       atomic_dec(&dest->activeconns);
+                       atomic_inc(&dest->inactconns);
+                       cp->flags |= IP_VS_CONN_F_INACTIVE;
+               } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+                       (state == IP_VS_TCP_S_ESTABLISHED)) {
+                       atomic_inc(&dest->activeconns);
+                       atomic_dec(&dest->inactconns);
+                       cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
+       } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+               (cp->state != state)) {
+               dest = cp->dest;
+               if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+               (state != IP_VS_SCTP_S_ESTABLISHED)) {
+                       atomic_dec(&dest->activeconns);
+                       atomic_inc(&dest->inactconns);
+                       cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+               }
        }
 
-       /* SyncID sanity check */
-       if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
-               IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
-                         m->syncid);
-               return;
+       if (opt)
+               memcpy(&cp->in_seq, opt, sizeof(*opt));
+       atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
+       cp->state = state;
+       cp->old_state = cp->state;
+       /*
+        * For Ver 0 messages style
+        *  - Not possible to recover the right timeout for templates
+        *  - can not find the right fwmark
+        *    virtual service. If needed, we can do it for
+        *    non-fwmark persistent services.
+        * Ver 1 messages style.
+        *  - No problem.
+        */
+       if (timeout) {
+               if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+                       timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+               cp->timeout = timeout*HZ;
+       } else {
+               struct ip_vs_proto_data *pd;
+
+               pd = ip_vs_proto_data_get(net, protocol);
+               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+                       cp->timeout = pd->timeout_table[state];
+               else
+                       cp->timeout = (3*60*HZ);
        }
+       ip_vs_conn_put(cp);
+}
 
-       p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+/*
+ *  Process received multicast message for Version 0
+ */
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+                                    const size_t buflen)
+{
+       struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
+       struct ip_vs_sync_conn_v0 *s;
+       struct ip_vs_sync_conn_options *opt;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_conn_param param;
+       char *p;
+       int i;
+
+       p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
        for (i=0; i<m->nr_conns; i++) {
                unsigned flags, state;
 
                if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
-                       IP_VS_ERR_RL("bogus conn in sync message\n");
+                       IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
                        return;
                }
-               s = (struct ip_vs_sync_conn *) p;
+               s = (struct ip_vs_sync_conn_v0 *) p;
                flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
                flags &= ~IP_VS_CONN_F_HASHED;
                if (flags & IP_VS_CONN_F_SEQ_MASK) {
                        opt = (struct ip_vs_sync_conn_options *)&s[1];
                        p += FULL_CONN_SIZE;
                        if (p > buffer+buflen) {
-                               IP_VS_ERR_RL("bogus conn options in sync message\n");
+                               IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
                                return;
                        }
                } else {
@@ -362,118 +864,286 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
                if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
                        pp = ip_vs_proto_get(s->protocol);
                        if (!pp) {
-                               IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
                                        s->protocol);
                                continue;
                        }
                        if (state >= pp->num_states) {
-                               IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
                                        pp->name, state);
                                continue;
                        }
                } else {
                        /* protocol in templates is not used for state/timeout */
-                       pp = NULL;
                        if (state > 0) {
-                               IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+                               IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
                                        state);
                                state = 0;
                        }
                }
 
-               {
-                       if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
-                                             (union nf_inet_addr *)&s->caddr,
-                                             s->cport,
-                                             (union nf_inet_addr *)&s->vaddr,
-                                             s->vport, &param)) {
-                               pr_err("ip_vs_conn_fill_param_sync failed");
-                               return;
+               ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+                                     (const union nf_inet_addr *)&s->caddr,
+                                     s->cport,
+                                     (const union nf_inet_addr *)&s->vaddr,
+                                     s->vport, &param);
+
+               /* Send timeout as Zero */
+               ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+                               (union nf_inet_addr *)&s->daddr, s->dport,
+                               0, 0, opt);
+       }
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+                                   __u32 *opt_flags,
+                                   struct ip_vs_sync_conn_options *opt)
+{
+       struct ip_vs_sync_conn_options *topt;
+
+       topt = (struct ip_vs_sync_conn_options *)p;
+
+       if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+               IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+               return -EINVAL;
+       }
+       if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+               IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+               return -EINVAL;
+       }
+       ntoh_seq(&topt->in_seq, &opt->in_seq);
+       ntoh_seq(&topt->out_seq, &opt->out_seq);
+       *opt_flags |= IPVS_OPT_F_SEQ_DATA;
+       return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+                         __u8 **data, unsigned int maxlen,
+                         __u32 *opt_flags, __u32 flag)
+{
+       if (plen > maxlen) {
+               IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+               return -EINVAL;
+       }
+       if (*opt_flags & flag) {
+               IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+               return -EINVAL;
+       }
+       *data_len = plen;
+       *data = p;
+       *opt_flags |= flag;
+       return 0;
+}
+/*
+ *   Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+{
+       struct ip_vs_sync_conn_options opt;
+       union  ip_vs_sync_conn *s;
+       struct ip_vs_protocol *pp;
+       struct ip_vs_conn_param param;
+       __u32 flags;
+       unsigned int af, state, pe_data_len=0, pe_name_len=0;
+       __u8 *pe_data=NULL, *pe_name=NULL;
+       __u32 opt_flags=0;
+       int retc=0;
+
+       s = (union ip_vs_sync_conn *) p;
+
+       if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+               af = AF_INET6;
+               p += sizeof(struct ip_vs_sync_v6);
+#else
+               IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+               retc = 10;
+               goto out;
+#endif
+       } else if (!s->v4.type) {
+               af = AF_INET;
+               p += sizeof(struct ip_vs_sync_v4);
+       } else {
+               return -10;
+       }
+       if (p > msg_end)
+               return -20;
+
+       /* Process optional params check Type & Len. */
+       while (p < msg_end) {
+               int ptype;
+               int plen;
+
+               if (p+2 > msg_end)
+                       return -30;
+               ptype = *(p++);
+               plen  = *(p++);
+
+               if (!plen || ((p + plen) > msg_end))
+                       return -40;
+               /* Handle seq option  p = param data */
+               switch (ptype & ~IPVS_OPT_F_PARAM) {
+               case IPVS_OPT_SEQ_DATA:
+                       if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+                               return -50;
+                       break;
+
+               case IPVS_OPT_PE_DATA:
+                       if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+                                          IP_VS_PEDATA_MAXLEN, &opt_flags,
+                                          IPVS_OPT_F_PE_DATA))
+                               return -60;
+                       break;
+
+               case IPVS_OPT_PE_NAME:
+                       if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+                                          IP_VS_PENAME_MAXLEN, &opt_flags,
+                                          IPVS_OPT_F_PE_NAME))
+                               return -70;
+                       break;
+
+               default:
+                       /* Param data mandatory ? */
+                       if (!(ptype & IPVS_OPT_F_PARAM)) {
+                               IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+                                         ptype & ~IPVS_OPT_F_PARAM);
+                               retc = 20;
+                               goto out;
                        }
-                       if (!(flags & IP_VS_CONN_F_TEMPLATE))
-                               cp = ip_vs_conn_in_get(&param);
-                       else
-                               cp = ip_vs_ct_in_get(&param);
                }
-               if (!cp) {
-                       /*
-                        * Find the appropriate destination for the connection.
-                        * If it is not found the connection will remain unbound
-                        * but still handled.
-                        */
-                       dest = ip_vs_find_dest(AF_INET,
-                                              (union nf_inet_addr *)&s->daddr,
-                                              s->dport,
-                                              (union nf_inet_addr *)&s->vaddr,
-                                              s->vport,
-                                              s->protocol);
-                       /*  Set the approprite ativity flag */
-                       if (s->protocol == IPPROTO_TCP) {
-                               if (state != IP_VS_TCP_S_ESTABLISHED)
-                                       flags |= IP_VS_CONN_F_INACTIVE;
-                               else
-                                       flags &= ~IP_VS_CONN_F_INACTIVE;
-                       } else if (s->protocol == IPPROTO_SCTP) {
-                               if (state != IP_VS_SCTP_S_ESTABLISHED)
-                                       flags |= IP_VS_CONN_F_INACTIVE;
-                               else
-                                       flags &= ~IP_VS_CONN_F_INACTIVE;
+               p += plen;  /* Next option */
+       }
+
+       /* Get flags and Mask off unsupported */
+       flags  = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+       flags |= IP_VS_CONN_F_SYNC;
+       state = ntohs(s->v4.state);
+
+       if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+               pp = ip_vs_proto_get(s->v4.protocol);
+               if (!pp) {
+                       IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+                               s->v4.protocol);
+                       retc = 30;
+                       goto out;
+               }
+               if (state >= pp->num_states) {
+                       IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+                               pp->name, state);
+                       retc = 40;
+                       goto out;
+               }
+       } else {
+               /* protocol in templates is not used for state/timeout */
+               if (state > 0) {
+                       IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+                               state);
+                       state = 0;
+               }
+       }
+       if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+                                      pe_data_len, pe_name, pe_name_len)) {
+               retc = 50;
+               goto out;
+       }
+       /* If only IPv4, just silent skip IPv6 */
+       if (af == AF_INET)
+               ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+                               (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+                               ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+                               (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+                               );
+#ifdef CONFIG_IP_VS_IPV6
+       else
+               ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+                               (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+                               ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+                               (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+                               );
+#endif
+       return 0;
+       /* Error exit */
+out:
+       IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+       return retc;
+
+}
+/*
+ *      Process received multicast message and create the corresponding
+ *      ip_vs_conn entries.
+ *      Handles Version 0 & 1
+ */
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+                                 const size_t buflen)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+       struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
+       __u8 *p, *msg_end;
+       int i, nr_conns;
+
+       if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
+               IP_VS_DBG(2, "BACKUP, message header too short\n");
+               return;
+       }
+       /* Convert size back to host byte order */
+       m2->size = ntohs(m2->size);
+
+       if (buflen != m2->size) {
+               IP_VS_DBG(2, "BACKUP, bogus message size\n");
+               return;
+       }
+       /* SyncID sanity check */
+       if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
+               IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+               return;
+       }
+       /* Handle version 1  message */
+       if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+           && (m2->spare == 0)) {
+
+               msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
+               nr_conns = m2->nr_conns;
+
+               for (i=0; i<nr_conns; i++) {
+                       union ip_vs_sync_conn *s;
+                       unsigned size;
+                       int retc;
+
+                       p = msg_end;
+                       if (p + sizeof(s->v4) > buffer+buflen) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+                               return;
                        }
-                       cp = ip_vs_conn_new(&param,
-                                           (union nf_inet_addr *)&s->daddr,
-                                           s->dport, flags, dest);
-                       if (dest)
-                               atomic_dec(&dest->refcnt);
-                       if (!cp) {
-                               pr_err("ip_vs_conn_new failed\n");
+                       s = (union ip_vs_sync_conn *)p;
+                       size = ntohs(s->v4.ver_size) & SVER_MASK;
+                       msg_end = p + size;
+                       /* Basic sanity checks */
+                       if (msg_end  > buffer+buflen) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
                                return;
                        }
-               } else if (!cp->dest) {
-                       dest = ip_vs_try_bind_dest(cp);
-                       if (dest)
-                               atomic_dec(&dest->refcnt);
-               } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
-                          (cp->state != state)) {
-                       /* update active/inactive flag for the connection */
-                       dest = cp->dest;
-                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                               (state != IP_VS_TCP_S_ESTABLISHED)) {
-                               atomic_dec(&dest->activeconns);
-                               atomic_inc(&dest->inactconns);
-                               cp->flags |= IP_VS_CONN_F_INACTIVE;
-                       } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                               (state == IP_VS_TCP_S_ESTABLISHED)) {
-                               atomic_inc(&dest->activeconns);
-                               atomic_dec(&dest->inactconns);
-                               cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+                                             ntohs(s->v4.ver_size) >> SVER_SHIFT);
+                               return;
                        }
-               } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
-                          (cp->state != state)) {
-                       dest = cp->dest;
-                       if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
-                            (state != IP_VS_SCTP_S_ESTABLISHED)) {
-                           atomic_dec(&dest->activeconns);
-                           atomic_inc(&dest->inactconns);
-                           cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+                       /* Process a single sync_conn */
+                       retc = ip_vs_proc_sync_conn(net, p, msg_end);
+                       if (retc < 0) {
+                               IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+                                            retc);
+                               return;
                        }
+                       /* Make sure we have 32 bit alignment */
+                       msg_end = p + ((size + 3) & ~3);
                }
-
-               if (opt)
-                       memcpy(&cp->in_seq, opt, sizeof(*opt));
-               atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
-               cp->state = state;
-               cp->old_state = cp->state;
-               /*
-                * We can not recover the right timeout for templates
-                * in all cases, we can not find the right fwmark
-                * virtual service. If needed, we can do it for
-                * non-fwmark persistent services.
-                */
-               if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
-                       cp->timeout = pp->timeout_table[state];
-               else
-                       cp->timeout = (3*60*HZ);
-               ip_vs_conn_put(cp);
+       } else {
+               /* Old type of message */
+               ip_vs_process_message_v0(net, buffer, buflen);
+               return;
        }
 }
 
@@ -511,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
 {
        struct net_device *dev;
        struct inet_sock *inet = inet_sk(sk);
+       struct net *net = sock_net(sk);
 
-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev)
                return -ENODEV;
 
        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -531,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
  *     Set the maximum length of sync message according to the
  *     specified interface's MTU.
  */
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct net_device *dev;
        int num;
 
        if (sync_state == IP_VS_STATE_MASTER) {
-               if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+               dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+               if (!dev)
                        return -ENODEV;
 
                num = (dev->mtu - sizeof(struct iphdr) -
                       sizeof(struct udphdr) -
                       SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
-               sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+               ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
                        SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
                IP_VS_DBG(7, "setting the maximum length of sync sending "
-                         "message %d.\n", sync_send_mesg_maxlen);
+                         "message %d.\n", ipvs->send_mesg_maxlen);
        } else if (sync_state == IP_VS_STATE_BACKUP) {
-               if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+               dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+               if (!dev)
                        return -ENODEV;
 
-               sync_recv_mesg_maxlen = dev->mtu -
+               ipvs->recv_mesg_maxlen = dev->mtu -
                        sizeof(struct iphdr) - sizeof(struct udphdr);
                IP_VS_DBG(7, "setting the maximum length of sync receiving "
-                         "message %d.\n", sync_recv_mesg_maxlen);
+                         "message %d.\n", ipvs->recv_mesg_maxlen);
        }
 
        return 0;
@@ -569,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
 static int
 join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 {
+       struct net *net = sock_net(sk);
        struct ip_mreqn mreq;
        struct net_device *dev;
        int ret;
@@ -576,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
        memset(&mreq, 0, sizeof(mreq));
        memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
 
-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev)
                return -ENODEV;
        if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
                return -EINVAL;
@@ -593,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
 
 static int bind_mcastif_addr(struct socket *sock, char *ifname)
 {
+       struct net *net = sock_net(sock->sk);
        struct net_device *dev;
        __be32 addr;
        struct sockaddr_in sin;
 
-       if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+       dev = __dev_get_by_name(net, ifname);
+       if (!dev)
                return -ENODEV;
 
        addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -619,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct socket *sock;
        int result;
 
@@ -631,7 +1311,7 @@ static struct socket * make_send_sock(void)
                return ERR_PTR(result);
        }
 
-       result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+       result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
        if (result < 0) {
                pr_err("Error setting outbound mcast interface\n");
                goto error;
@@ -640,7 +1320,7 @@ static struct socket * make_send_sock(void)
        set_mcast_loop(sock->sk, 0);
        set_mcast_ttl(sock->sk, 1);
 
-       result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+       result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
        if (result < 0) {
                pr_err("Error binding address of the mcast interface\n");
                goto error;
@@ -664,8 +1344,9 @@ static struct socket * make_send_sock(void)
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
        struct socket *sock;
        int result;
 
@@ -689,7 +1370,7 @@ static struct socket * make_receive_sock(void)
        /* join the multicast group */
        result = join_mcast_group(sock->sk,
                        (struct in_addr *) &mcast_addr.sin_addr,
-                       ip_vs_backup_mcast_ifn);
+                       ipvs->backup_mcast_ifn);
        if (result < 0) {
                pr_err("Error joining to the multicast group\n");
                goto error;
@@ -760,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 static int sync_thread_master(void *data)
 {
        struct ip_vs_sync_thread_data *tinfo = data;
+       struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
        struct ip_vs_sync_buff *sb;
 
        pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
                "syncid = %d\n",
-               ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+               ipvs->master_mcast_ifn, ipvs->master_syncid);
 
        while (!kthread_should_stop()) {
-               while ((sb = sb_dequeue())) {
+               while ((sb = sb_dequeue(ipvs))) {
                        ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
                        ip_vs_sync_buff_release(sb);
                }
 
-               /* check if entries stay in curr_sb for 2 seconds */
-               sb = get_curr_sync_buff(2 * HZ);
+               /* check if entries stay in ipvs->sync_buff for 2 seconds */
+               sb = get_curr_sync_buff(ipvs, 2 * HZ);
                if (sb) {
                        ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
                        ip_vs_sync_buff_release(sb);
@@ -783,14 +1465,13 @@ static int sync_thread_master(void *data)
        }
 
        /* clean up the sync_buff queue */
-       while ((sb=sb_dequeue())) {
+       while ((sb = sb_dequeue(ipvs)))
                ip_vs_sync_buff_release(sb);
-       }
 
        /* clean up the current sync_buff */
-       if ((sb = get_curr_sync_buff(0))) {
+       sb = get_curr_sync_buff(ipvs, 0);
+       if (sb)
                ip_vs_sync_buff_release(sb);
-       }
 
        /* release the sending multicast socket */
        sock_release(tinfo->sock);
@@ -803,11 +1484,12 @@ static int sync_thread_master(void *data)
 static int sync_thread_backup(void *data)
 {
        struct ip_vs_sync_thread_data *tinfo = data;
+       struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
        int len;
 
        pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
                "syncid = %d\n",
-               ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+               ipvs->backup_mcast_ifn, ipvs->backup_syncid);
 
        while (!kthread_should_stop()) {
                wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -817,7 +1499,7 @@ static int sync_thread_backup(void *data)
                /* do we have data now? */
                while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
                        len = ip_vs_receive(tinfo->sock, tinfo->buf,
-                                       sync_recv_mesg_maxlen);
+                                       ipvs->recv_mesg_maxlen);
                        if (len <= 0) {
                                pr_err("receiving message error\n");
                                break;
@@ -826,7 +1508,7 @@ static int sync_thread_backup(void *data)
                        /* disable bottom half, because it accesses the data
                           shared by softirq while getting/creating conns */
                        local_bh_disable();
-                       ip_vs_process_message(tinfo->buf, len);
+                       ip_vs_process_message(tinfo->net, tinfo->buf, len);
                        local_bh_enable();
                }
        }
@@ -840,41 +1522,42 @@ static int sync_thread_backup(void *data)
 }
 
 
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 {
        struct ip_vs_sync_thread_data *tinfo;
        struct task_struct **realtask, *task;
        struct socket *sock;
+       struct netns_ipvs *ipvs = net_ipvs(net);
        char *name, *buf = NULL;
        int (*threadfn)(void *data);
        int result = -ENOMEM;
 
        IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
        IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
-                 sizeof(struct ip_vs_sync_conn));
+                 sizeof(struct ip_vs_sync_conn_v0));
 
        if (state == IP_VS_STATE_MASTER) {
-               if (sync_master_thread)
+               if (ipvs->master_thread)
                        return -EEXIST;
 
-               strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
-                       sizeof(ip_vs_master_mcast_ifn));
-               ip_vs_master_syncid = syncid;
-               realtask = &sync_master_thread;
-               name = "ipvs_syncmaster";
+               strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+                       sizeof(ipvs->master_mcast_ifn));
+               ipvs->master_syncid = syncid;
+               realtask = &ipvs->master_thread;
+               name = "ipvs_master:%d";
                threadfn = sync_thread_master;
-               sock = make_send_sock();
+               sock = make_send_sock(net);
        } else if (state == IP_VS_STATE_BACKUP) {
-               if (sync_backup_thread)
+               if (ipvs->backup_thread)
                        return -EEXIST;
 
-               strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
-                       sizeof(ip_vs_backup_mcast_ifn));
-               ip_vs_backup_syncid = syncid;
-               realtask = &sync_backup_thread;
-               name = "ipvs_syncbackup";
+               strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+                       sizeof(ipvs->backup_mcast_ifn));
+               ipvs->backup_syncid = syncid;
+               realtask = &ipvs->backup_thread;
+               name = "ipvs_backup:%d";
                threadfn = sync_thread_backup;
-               sock = make_receive_sock();
+               sock = make_receive_sock(net);
        } else {
                return -EINVAL;
        }
@@ -884,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
                goto out;
        }
 
-       set_sync_mesg_maxlen(state);
+       set_sync_mesg_maxlen(net, state);
        if (state == IP_VS_STATE_BACKUP) {
-               buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+               buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
                if (!buf)
                        goto outsocket;
        }
@@ -895,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
        if (!tinfo)
                goto outbuf;
 
+       tinfo->net = net;
        tinfo->sock = sock;
        tinfo->buf = buf;
 
-       task = kthread_run(threadfn, tinfo, name);
+       task = kthread_run(threadfn, tinfo, name, ipvs->gen);
        if (IS_ERR(task)) {
                result = PTR_ERR(task);
                goto outtinfo;
@@ -906,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 
        /* mark as active */
        *realtask = task;
-       ip_vs_sync_state |= state;
+       ipvs->sync_state |= state;
 
        /* increase the module use count */
        ip_vs_use_count_inc();
@@ -924,16 +1608,18 @@ out:
 }
 
 
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
 {
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
        IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
        if (state == IP_VS_STATE_MASTER) {
-               if (!sync_master_thread)
+               if (!ipvs->master_thread)
                        return -ESRCH;
 
                pr_info("stopping master sync thread %d ...\n",
-                       task_pid_nr(sync_master_thread));
+                       task_pid_nr(ipvs->master_thread));
 
                /*
                 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -941,21 +1627,21 @@ int stop_sync_thread(int state)
                 * progress of stopping the master sync daemon.
                 */
 
-               spin_lock_bh(&ip_vs_sync_lock);
-               ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
-               spin_unlock_bh(&ip_vs_sync_lock);
-               kthread_stop(sync_master_thread);
-               sync_master_thread = NULL;
+               spin_lock_bh(&ipvs->sync_lock);
+               ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+               spin_unlock_bh(&ipvs->sync_lock);
+               kthread_stop(ipvs->master_thread);
+               ipvs->master_thread = NULL;
        } else if (state == IP_VS_STATE_BACKUP) {
-               if (!sync_backup_thread)
+               if (!ipvs->backup_thread)
                        return -ESRCH;
 
                pr_info("stopping backup sync thread %d ...\n",
-                       task_pid_nr(sync_backup_thread));
+                       task_pid_nr(ipvs->backup_thread));
 
-               ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
-               kthread_stop(sync_backup_thread);
-               sync_backup_thread = NULL;
+               ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+               kthread_stop(ipvs->backup_thread);
+               ipvs->backup_thread = NULL;
        } else {
                return -EINVAL;
        }
@@ -965,3 +1651,42 @@ int stop_sync_thread(int state)
 
        return 0;
 }
+
+/*
+ * Initialize data struct for each netns
+ */
+static int __net_init __ip_vs_sync_init(struct net *net)
+{
+       struct netns_ipvs *ipvs = net_ipvs(net);
+
+       INIT_LIST_HEAD(&ipvs->sync_queue);
+       spin_lock_init(&ipvs->sync_lock);
+       spin_lock_init(&ipvs->sync_buff_lock);
+
+       ipvs->sync_mcast_addr.sin_family = AF_INET;
+       ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+       ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
+       return 0;
+}
+
+static void __ip_vs_sync_cleanup(struct net *net)
+{
+       stop_sync_thread(net, IP_VS_STATE_MASTER);
+       stop_sync_thread(net, IP_VS_STATE_BACKUP);
+}
+
+static struct pernet_operations ipvs_sync_ops = {
+       .init = __ip_vs_sync_init,
+       .exit = __ip_vs_sync_cleanup,
+};
+
+
+int __init ip_vs_sync_init(void)
+{
+       return register_pernet_subsys(&ipvs_sync_ops);
+}
+
+void __exit ip_vs_sync_cleanup(void)
+{
+       unregister_pernet_subsys(&ipvs_sync_ops);
+}
index 5325a3f..1f2a4e3 100644 (file)
@@ -175,7 +175,6 @@ __ip_vs_reroute_locally(struct sk_buff *skb)
                        .fl4_tos = RT_TOS(iph->tos),
                        .mark = skb->mark,
                };
-               struct rtable *rt;
 
                if (ip_route_output_key(net, &rt, &fl))
                        return 0;
@@ -390,7 +389,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                ip_rt_put(rt);
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -443,7 +443,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
                                 "ip_vs_nat_xmit(): frag needed for");
@@ -658,7 +659,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -773,8 +774,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        df |= (old_iph->frag_off & htons(IP_DF));
 
-       if ((old_iph->frag_off & htons(IP_DF))
-           && mtu < ntohs(old_iph->tot_len)) {
+       if ((old_iph->frag_off & htons(IP_DF) &&
+           mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                goto tx_error_put;
@@ -886,7 +887,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
        if (skb_dst(skb))
                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
-       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
+       if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
+           !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
@@ -991,7 +993,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
+       if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
+           !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
                ip_rt_put(rt);
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -1158,7 +1161,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
+       if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
+           !skb_is_gso(skb)) {
                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
                IP_VS_DBG_RL("%s(): frag needed\n", __func__);
                goto tx_error_put;
@@ -1272,7 +1276,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
        /* MTU checking */
        mtu = dst_mtu(&rt->dst);
-       if (skb->len > mtu) {
+       if (skb->len > mtu && !skb_is_gso(skb)) {
                if (!skb->dev) {
                        struct net *net = dev_net(skb_dst(skb)->dev);
 
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
new file mode 100644 (file)
index 0000000..4e99cca
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ *      broadcast connection tracking helper
+ *
+ *      (c) 2005 Patrick McHardy <kaber@trash.net>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <net/route.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+int nf_conntrack_broadcast_help(struct sk_buff *skb,
+                               unsigned int protoff,
+                               struct nf_conn *ct,
+                               enum ip_conntrack_info ctinfo,
+                               unsigned int timeout)
+{
+       struct nf_conntrack_expect *exp;
+       struct iphdr *iph = ip_hdr(skb);
+       struct rtable *rt = skb_rtable(skb);
+       struct in_device *in_dev;
+       struct nf_conn_help *help = nfct_help(ct);
+       __be32 mask = 0;
+
+       /* we're only interested in locally generated packets */
+       if (skb->sk == NULL)
+               goto out;
+       if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
+               goto out;
+       if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+               goto out;
+
+       rcu_read_lock();
+       in_dev = __in_dev_get_rcu(rt->dst.dev);
+       if (in_dev != NULL) {
+               for_primary_ifa(in_dev) {
+                       if (ifa->ifa_broadcast == iph->daddr) {
+                               mask = ifa->ifa_mask;
+                               break;
+                       }
+               } endfor_ifa(in_dev);
+       }
+       rcu_read_unlock();
+
+       if (mask == 0)
+               goto out;
+
+       exp = nf_ct_expect_alloc(ct);
+       if (exp == NULL)
+               goto out;
+
+       exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+       exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;
+
+       exp->mask.src.u3.ip       = mask;
+       exp->mask.src.u.udp.port  = htons(0xFFFF);
+
+       exp->expectfn             = NULL;
+       exp->flags                = NF_CT_EXPECT_PERMANENT;
+       exp->class                = NF_CT_EXPECT_CLASS_DEFAULT;
+       exp->helper               = NULL;
+
+       nf_ct_expect_related(exp);
+       nf_ct_expect_put(exp);
+
+       nf_ct_refresh(ct, skb, timeout * HZ);
+out:
+       return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
+
+MODULE_LICENSE("GPL");
index e615119..1909311 100644 (file)
@@ -43,6 +43,7 @@
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 
@@ -282,6 +283,11 @@ EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
 static void death_by_timeout(unsigned long ul_conntrack)
 {
        struct nf_conn *ct = (void *)ul_conntrack;
+       struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp && tstamp->stop == 0)
+               tstamp->stop = ktime_to_ns(ktime_get_real());
 
        if (!test_bit(IPS_DYING_BIT, &ct->status) &&
            unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
@@ -419,6 +425,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        struct nf_conntrack_tuple_hash *h;
        struct nf_conn *ct;
        struct nf_conn_help *help;
+       struct nf_conn_tstamp *tstamp;
        struct hlist_nulls_node *n;
        enum ip_conntrack_info ctinfo;
        struct net *net;
@@ -486,8 +493,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        ct->timeout.expires += jiffies;
        add_timer(&ct->timeout);
        atomic_inc(&ct->ct_general.use);
-       set_bit(IPS_CONFIRMED_BIT, &ct->status);
+       ct->status |= IPS_CONFIRMED;
+
+       /* set conntrack timestamp, if enabled. */
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp) {
+               if (skb->tstamp.tv64 == 0)
+                       __net_timestamp((struct sk_buff *)skb);
 
+               tstamp->start = ktime_to_ns(skb->tstamp);
+       }
        /* Since the lookup is lockless, hash insertion must be done after
         * starting the timer and setting the CONFIRMED bit. The RCU barriers
         * guarantee that no other CPU can find the conntrack before the above
@@ -655,7 +670,8 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
         * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
         */
        memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
-              sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
+              offsetof(struct nf_conn, proto) -
+              offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
        spin_lock_init(&ct->lock);
        ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
        ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -745,6 +761,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
        }
 
        nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+       nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
 
        ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;
        nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,
@@ -1185,6 +1202,11 @@ struct __nf_ct_flush_report {
 static int kill_report(struct nf_conn *i, void *data)
 {
        struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
+       struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(i);
+       if (tstamp && tstamp->stop == 0)
+               tstamp->stop = ktime_to_ns(ktime_get_real());
 
        /* If we fail to deliver the event, death_by_timeout() will retry */
        if (nf_conntrack_event_report(IPCT_DESTROY, i,
@@ -1201,9 +1223,9 @@ static int kill_all(struct nf_conn *i, void *data)
        return 1;
 }
 
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
 {
-       if (vmalloced)
+       if (is_vmalloc_addr(hash))
                vfree(hash);
        else
                free_pages((unsigned long)hash,
@@ -1270,8 +1292,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
                goto i_see_dead_people;
        }
 
-       nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-                            net->ct.htable_size);
+       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
        nf_conntrack_ecache_fini(net);
        nf_conntrack_acct_fini(net);
        nf_conntrack_expect_fini(net);
@@ -1300,21 +1321,18 @@ void nf_conntrack_cleanup(struct net *net)
        }
 }
 
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
 {
        struct hlist_nulls_head *hash;
        unsigned int nr_slots, i;
        size_t sz;
 
-       *vmalloced = 0;
-
        BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
        nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
        sz = nr_slots * sizeof(struct hlist_nulls_head);
        hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
                                        get_order(sz));
        if (!hash) {
-               *vmalloced = 1;
                printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
                hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
                                 PAGE_KERNEL);
@@ -1330,7 +1348,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
-       int i, bucket, vmalloced, old_vmalloced;
+       int i, bucket;
        unsigned int hashsize, old_size;
        struct hlist_nulls_head *hash, *old_hash;
        struct nf_conntrack_tuple_hash *h;
@@ -1347,7 +1365,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
        if (!hashsize)
                return -EINVAL;
 
-       hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+       hash = nf_ct_alloc_hashtable(&hashsize, 1);
        if (!hash)
                return -ENOMEM;
 
@@ -1369,15 +1387,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
                }
        }
        old_size = init_net.ct.htable_size;
-       old_vmalloced = init_net.ct.hash_vmalloc;
        old_hash = init_net.ct.hash;
 
        init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-       init_net.ct.hash_vmalloc = vmalloced;
        init_net.ct.hash = hash;
        spin_unlock_bh(&nf_conntrack_lock);
 
-       nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+       nf_ct_free_hashtable(old_hash, old_size);
        return 0;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1490,8 +1506,7 @@ static int nf_conntrack_init_net(struct net *net)
        }
 
        net->ct.htable_size = nf_conntrack_htable_size;
-       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
-                                            &net->ct.hash_vmalloc, 1);
+       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
        if (!net->ct.hash) {
                ret = -ENOMEM;
                printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1503,6 +1518,9 @@ static int nf_conntrack_init_net(struct net *net)
        ret = nf_conntrack_acct_init(net);
        if (ret < 0)
                goto err_acct;
+       ret = nf_conntrack_tstamp_init(net);
+       if (ret < 0)
+               goto err_tstamp;
        ret = nf_conntrack_ecache_init(net);
        if (ret < 0)
                goto err_ecache;
@@ -1510,12 +1528,13 @@ static int nf_conntrack_init_net(struct net *net)
        return 0;
 
 err_ecache:
+       nf_conntrack_tstamp_fini(net);
+err_tstamp:
        nf_conntrack_acct_fini(net);
 err_acct:
        nf_conntrack_expect_fini(net);
 err_expect:
-       nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-                            net->ct.htable_size);
+       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
 err_hash:
        kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
index a20fb0b..cd1e8e0 100644 (file)
@@ -319,7 +319,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
        const struct nf_conntrack_expect_policy *p;
        unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
-       atomic_inc(&exp->use);
+       /* two references : one for hash insert, one for the timer */
+       atomic_add(2, &exp->use);
 
        if (master_help) {
                hlist_add_head(&exp->lnode, &master_help->expectations);
@@ -333,12 +334,14 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
        setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
                    (unsigned long)exp);
        if (master_help) {
-               p = &master_help->helper->expect_policy[exp->class];
+               p = &rcu_dereference_protected(
+                               master_help->helper,
+                               lockdep_is_held(&nf_conntrack_lock)
+                               )->expect_policy[exp->class];
                exp->timeout.expires = jiffies + p->timeout * HZ;
        }
        add_timer(&exp->timeout);
 
-       atomic_inc(&exp->use);
        NF_CT_STAT_INC(net, expect_create);
 }
 
@@ -369,7 +372,10 @@ static inline int refresh_timer(struct nf_conntrack_expect *i)
        if (!del_timer(&i->timeout))
                return 0;
 
-       p = &master_help->helper->expect_policy[i->class];
+       p = &rcu_dereference_protected(
+               master_help->helper,
+               lockdep_is_held(&nf_conntrack_lock)
+               )->expect_policy[i->class];
        i->timeout.expires = jiffies + p->timeout * HZ;
        add_timer(&i->timeout);
        return 1;
@@ -407,7 +413,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
        }
        /* Will be over limit? */
        if (master_help) {
-               p = &master_help->helper->expect_policy[expect->class];
+               p = &rcu_dereference_protected(
+                       master_help->helper,
+                       lockdep_is_held(&nf_conntrack_lock)
+                       )->expect_policy[expect->class];
                if (p->max_expected &&
                    master_help->expecting[expect->class] >= p->max_expected) {
                        evict_oldest_expect(master, expect);
@@ -478,7 +487,7 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
        struct hlist_node *n;
 
        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-               n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
                if (n)
                        return n;
        }
@@ -491,11 +500,11 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_expect_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_next_rcu(head));
        while (head == NULL) {
                if (++st->bucket >= nf_ct_expect_hsize)
                        return NULL;
-               head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+               head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
        }
        return head;
 }
@@ -630,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
        }
 
        net->ct.expect_count = 0;
-       net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
-                                                 &net->ct.expect_vmalloc, 0);
+       net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
        if (net->ct.expect_hash == NULL)
                goto err1;
 
@@ -653,8 +661,7 @@ err3:
        if (net_eq(net, &init_net))
                kmem_cache_destroy(nf_ct_expect_cachep);
 err2:
-       nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-                            nf_ct_expect_hsize);
+       nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 err1:
        return err;
 }
@@ -666,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
                rcu_barrier(); /* Wait for call_rcu() before destroy */
                kmem_cache_destroy(nf_ct_expect_cachep);
        }
-       nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
-                            nf_ct_expect_hsize);
+       nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
 }
index bd82450..80a23ed 100644 (file)
@@ -140,15 +140,16 @@ static void update_alloc_size(struct nf_ct_ext_type *type)
        /* This assumes that extended areas in conntrack for the types
           whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */
        for (i = min; i <= max; i++) {
-               t1 = nf_ct_ext_types[i];
+               t1 = rcu_dereference_protected(nf_ct_ext_types[i],
+                               lockdep_is_held(&nf_ct_ext_type_mutex));
                if (!t1)
                        continue;
 
-               t1->alloc_size = sizeof(struct nf_ct_ext)
-                                + ALIGN(sizeof(struct nf_ct_ext), t1->align)
-                                + t1->len;
+               t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +
+                                t1->len;
                for (j = 0; j < NF_CT_EXT_NUM; j++) {
-                       t2 = nf_ct_ext_types[j];
+                       t2 = rcu_dereference_protected(nf_ct_ext_types[j],
+                               lockdep_is_held(&nf_ct_ext_type_mutex));
                        if (t2 == NULL || t2 == t1 ||
                            (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)
                                continue;
index 59e1a4c..1bdfea3 100644 (file)
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
 static struct hlist_head *nf_ct_helper_hash __read_mostly;
 static unsigned int nf_ct_helper_hsize __read_mostly;
 static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
 
 
 /* Stupid hash, but collision free for the default registrations of the
@@ -158,7 +157,10 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
        struct nf_conn_help *help = nfct_help(ct);
 
-       if (help && help->helper == me) {
+       if (help && rcu_dereference_protected(
+                       help->helper,
+                       lockdep_is_held(&nf_conntrack_lock)
+                       ) == me) {
                nf_conntrack_event(IPCT_HELPER, ct);
                rcu_assign_pointer(help->helper, NULL);
        }
@@ -210,7 +212,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
                hlist_for_each_entry_safe(exp, n, next,
                                          &net->ct.expect_hash[i], hnode) {
                        struct nf_conn_help *help = nfct_help(exp->master);
-                       if ((help->helper == me || exp->helper == me) &&
+                       if ((rcu_dereference_protected(
+                                       help->helper,
+                                       lockdep_is_held(&nf_conntrack_lock)
+                                       ) == me || exp->helper == me) &&
                            del_timer(&exp->timeout)) {
                                nf_ct_unlink_expect(exp);
                                nf_ct_expect_put(exp);
@@ -261,8 +266,7 @@ int nf_conntrack_helper_init(void)
        int err;
 
        nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
-       nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
-                                                 &nf_ct_helper_vmalloc, 0);
+       nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
        if (!nf_ct_helper_hash)
                return -ENOMEM;
 
@@ -273,14 +277,12 @@ int nf_conntrack_helper_init(void)
        return 0;
 
 err1:
-       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-                            nf_ct_helper_hsize);
+       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
        return err;
 }
 
 void nf_conntrack_helper_fini(void)
 {
        nf_ct_extend_unregister(&helper_extend);
-       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
-                            nf_ct_helper_hsize);
+       nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
 }
index aadde01..4c8f30a 100644 (file)
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/if_addr.h>
 #include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <net/route.h>
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_helper.h>
@@ -40,75 +33,26 @@ MODULE_ALIAS("ip_conntrack_netbios_ns");
 MODULE_ALIAS_NFCT_HELPER("netbios_ns");
 
 static unsigned int timeout __read_mostly = 3;
-module_param(timeout, uint, 0400);
+module_param(timeout, uint, S_IRUSR);
 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
 
-static int help(struct sk_buff *skb, unsigned int protoff,
-               struct nf_conn *ct, enum ip_conntrack_info ctinfo)
-{
-       struct nf_conntrack_expect *exp;
-       struct iphdr *iph = ip_hdr(skb);
-       struct rtable *rt = skb_rtable(skb);
-       struct in_device *in_dev;
-       __be32 mask = 0;
-
-       /* we're only interested in locally generated packets */
-       if (skb->sk == NULL)
-               goto out;
-       if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
-               goto out;
-       if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
-               goto out;
-
-       rcu_read_lock();
-       in_dev = __in_dev_get_rcu(rt->dst.dev);
-       if (in_dev != NULL) {
-               for_primary_ifa(in_dev) {
-                       if (ifa->ifa_broadcast == iph->daddr) {
-                               mask = ifa->ifa_mask;
-                               break;
-                       }
-               } endfor_ifa(in_dev);
-       }
-       rcu_read_unlock();
-
-       if (mask == 0)
-               goto out;
-
-       exp = nf_ct_expect_alloc(ct);
-       if (exp == NULL)
-               goto out;
-
-       exp->tuple                = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-       exp->tuple.src.u.udp.port = htons(NMBD_PORT);
-
-       exp->mask.src.u3.ip       = mask;
-       exp->mask.src.u.udp.port  = htons(0xFFFF);
-
-       exp->expectfn             = NULL;
-       exp->flags                = NF_CT_EXPECT_PERMANENT;
-       exp->class                = NF_CT_EXPECT_CLASS_DEFAULT;
-       exp->helper               = NULL;
-
-       nf_ct_expect_related(exp);
-       nf_ct_expect_put(exp);
-
-       nf_ct_refresh(ct, skb, timeout * HZ);
-out:
-       return NF_ACCEPT;
-}
-
 static struct nf_conntrack_expect_policy exp_policy = {
        .max_expected   = 1,
 };
 
+static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff,
+                  struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+       return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+}
+
 static struct nf_conntrack_helper helper __read_mostly = {
        .name                   = "netbios-ns",
-       .tuple.src.l3num        = AF_INET,
+       .tuple.src.l3num        = NFPROTO_IPV4,
        .tuple.src.u.udp.port   = cpu_to_be16(NMBD_PORT),
        .tuple.dst.protonum     = IPPROTO_UDP,
        .me                     = THIS_MODULE,
-       .help                   = help,
+       .help                   = netbios_ns_help,
        .expect_policy          = &exp_policy,
 };
 
index 93297aa..3fec12c 100644 (file)
@@ -42,6 +42,7 @@
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
 #ifdef CONFIG_NF_NAT_NEEDED
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_protocol.h>
@@ -230,6 +231,33 @@ nla_put_failure:
        return -1;
 }
 
+static int
+ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+{
+       struct nlattr *nest_count;
+       const struct nf_conn_tstamp *tstamp;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (!tstamp)
+               return 0;
+
+       nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED);
+       if (!nest_count)
+               goto nla_put_failure;
+
+       NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start));
+       if (tstamp->stop != 0) {
+               NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP,
+                            cpu_to_be64(tstamp->stop));
+       }
+       nla_nest_end(skb, nest_count);
+
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
 static inline int
 ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
@@ -404,6 +432,7 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
            ctnetlink_dump_timeout(skb, ct) < 0 ||
            ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
            ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+           ctnetlink_dump_timestamp(skb, ct) < 0 ||
            ctnetlink_dump_protoinfo(skb, ct) < 0 ||
            ctnetlink_dump_helpinfo(skb, ct) < 0 ||
            ctnetlink_dump_mark(skb, ct) < 0 ||
@@ -470,6 +499,18 @@ ctnetlink_secctx_size(const struct nf_conn *ct)
 #endif
 }
 
+static inline size_t
+ctnetlink_timestamp_size(const struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+       if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP))
+               return 0;
+       return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t));
+#else
+       return 0;
+#endif
+}
+
 static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
 {
@@ -481,6 +522,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
               + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
               + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
               + ctnetlink_counters_size(ct)
+              + ctnetlink_timestamp_size(ct)
               + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
               + nla_total_size(0) /* CTA_PROTOINFO */
               + nla_total_size(0) /* CTA_HELP */
@@ -571,7 +613,8 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 
        if (events & (1 << IPCT_DESTROY)) {
                if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
-                   ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+                   ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+                   ctnetlink_dump_timestamp(skb, ct) < 0)
                        goto nla_put_failure;
        } else {
                if (ctnetlink_dump_timeout(skb, ct) < 0)
@@ -1357,6 +1400,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
        }
 
        nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+       nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
        nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
        /* we must add conntrack extensions before confirmation. */
        ct->status |= IPS_CONFIRMED;
@@ -1375,6 +1419,7 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
        }
 #endif
 
+       memset(&ct->proto, 0, sizeof(ct->proto));
        if (cda[CTA_PROTOINFO]) {
                err = ctnetlink_change_protoinfo(ct, cda);
                if (err < 0)
index dc7bb74..5701c8d 100644 (file)
@@ -166,6 +166,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct nf_conntrack_l3proto *l3proto
 int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
        int ret = 0;
+       struct nf_conntrack_l3proto *old;
 
        if (proto->l3proto >= AF_MAX)
                return -EBUSY;
@@ -174,7 +175,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
                return -EINVAL;
 
        mutex_lock(&nf_ct_proto_mutex);
-       if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
+       old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+                                       lockdep_is_held(&nf_ct_proto_mutex));
+       if (old != &nf_conntrack_l3proto_generic) {
                ret = -EBUSY;
                goto out_unlock;
        }
@@ -201,7 +204,9 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto)
        BUG_ON(proto->l3proto >= AF_MAX);
 
        mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(nf_ct_l3protos[proto->l3proto] != proto);
+       BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto],
+                                        lockdep_is_held(&nf_ct_proto_mutex)
+                                        ) != proto);
        rcu_assign_pointer(nf_ct_l3protos[proto->l3proto],
                           &nf_conntrack_l3proto_generic);
        nf_ct_l3proto_unregister_sysctl(proto);
@@ -279,7 +284,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
        mutex_lock(&nf_ct_proto_mutex);
        if (!nf_ct_protos[l4proto->l3proto]) {
                /* l3proto may be loaded latter. */
-               struct nf_conntrack_l4proto **proto_array;
+               struct nf_conntrack_l4proto __rcu **proto_array;
                int i;
 
                proto_array = kmalloc(MAX_NF_CT_PROTO *
@@ -291,7 +296,7 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                }
 
                for (i = 0; i < MAX_NF_CT_PROTO; i++)
-                       proto_array[i] = &nf_conntrack_l4proto_generic;
+                       RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
 
                /* Before making proto_array visible to lockless readers,
                 * we must make sure its content is committed to memory.
@@ -299,8 +304,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                smp_wmb();
 
                nf_ct_protos[l4proto->l3proto] = proto_array;
-       } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] !=
-                                       &nf_conntrack_l4proto_generic) {
+       } else if (rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       lockdep_is_held(&nf_ct_proto_mutex)
+                       ) != &nf_conntrack_l4proto_generic) {
                ret = -EBUSY;
                goto out_unlock;
        }
@@ -331,7 +338,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
        BUG_ON(l4proto->l3proto >= PF_MAX);
 
        mutex_lock(&nf_ct_proto_mutex);
-       BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto);
+       BUG_ON(rcu_dereference_protected(
+                       nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
+                       lockdep_is_held(&nf_ct_proto_mutex)
+                       ) != l4proto);
        rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
                           &nf_conntrack_l4proto_generic);
        nf_ct_l4proto_unregister_sysctl(l4proto);
index 5292560..9ae57c5 100644 (file)
@@ -452,6 +452,9 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
        ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
        ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER;
        ct->proto.dccp.state = CT_DCCP_NONE;
+       ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST;
+       ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL;
+       ct->proto.dccp.handshake_seq = 0;
        return true;
 
 out_invalid:
index c6049c2..6f4ee70 100644 (file)
@@ -413,6 +413,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
            test_bit(SCTP_CID_COOKIE_ACK, map))
                return false;
 
+       memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp));
        new_state = SCTP_CONNTRACK_MAX;
        for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
                /* Don't need lock here: this conntrack not in circulation yet */
index 3fb2b73..6f38d0e 100644 (file)
@@ -1066,9 +1066,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
        BUG_ON(th == NULL);
 
        /* Don't need lock here: this conntrack not in circulation yet */
-       new_state
-               = tcp_conntracks[0][get_conntrack_index(th)]
-               [TCP_CONNTRACK_NONE];
+       new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
 
        /* Invalid: delete conntrack */
        if (new_state >= TCP_CONNTRACK_MAX) {
@@ -1077,6 +1075,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
        }
 
        if (new_state == TCP_CONNTRACK_SYN_SENT) {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
                /* SYN packet */
                ct->proto.tcp.seen[0].td_end =
                        segment_seq_plus_len(ntohl(th->seq), skb->len,
@@ -1088,11 +1087,11 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                        ct->proto.tcp.seen[0].td_end;
 
                tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
-               ct->proto.tcp.seen[1].flags = 0;
        } else if (nf_ct_tcp_loose == 0) {
                /* Don't try to pick up connections. */
                return false;
        } else {
+               memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
                /*
                 * We are in the middle of a connection,
                 * its history is lost for us.
@@ -1107,7 +1106,6 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                ct->proto.tcp.seen[0].td_maxend =
                        ct->proto.tcp.seen[0].td_end +
                        ct->proto.tcp.seen[0].td_maxwin;
-               ct->proto.tcp.seen[0].td_scale = 0;
 
                /* We assume SACK and liberal window checking to handle
                 * window scaling */
@@ -1116,13 +1114,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
                                              IP_CT_TCP_FLAG_BE_LIBERAL;
        }
 
-       ct->proto.tcp.seen[1].td_end = 0;
-       ct->proto.tcp.seen[1].td_maxend = 0;
-       ct->proto.tcp.seen[1].td_maxwin = 0;
-       ct->proto.tcp.seen[1].td_scale = 0;
-
        /* tcp_packet will set them */
-       ct->proto.tcp.state = TCP_CONNTRACK_NONE;
        ct->proto.tcp.last_index = TCP_NONE_SET;
 
        pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c
new file mode 100644 (file)
index 0000000..6e545e2
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ *      SNMP service broadcast connection tracking helper
+ *
+ *      (c) 2011 Jiri Olsa <jolsa@redhat.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/in.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
+#define SNMP_PORT      161
+
+MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>");
+MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFCT_HELPER("snmp");
+
+static unsigned int timeout __read_mostly = 30;
+module_param(timeout, uint, S_IRUSR);
+MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
+
+int (*nf_nat_snmp_hook)(struct sk_buff *skb,
+                       unsigned int protoff,
+                       struct nf_conn *ct,
+                       enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL_GPL(nf_nat_snmp_hook);
+
+static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff,
+               struct nf_conn *ct, enum ip_conntrack_info ctinfo)
+{
+       typeof(nf_nat_snmp_hook) nf_nat_snmp;
+
+       nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout);
+
+       nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook);
+       if (nf_nat_snmp && ct->status & IPS_NAT_MASK)
+               return nf_nat_snmp(skb, protoff, ct, ctinfo);
+
+       return NF_ACCEPT;
+}
+
+static struct nf_conntrack_expect_policy exp_policy = {
+       .max_expected   = 1,
+};
+
+static struct nf_conntrack_helper helper __read_mostly = {
+       .name                   = "snmp",
+       .tuple.src.l3num        = NFPROTO_IPV4,
+       .tuple.src.u.udp.port   = cpu_to_be16(SNMP_PORT),
+       .tuple.dst.protonum     = IPPROTO_UDP,
+       .me                     = THIS_MODULE,
+       .help                   = snmp_conntrack_help,
+       .expect_policy          = &exp_policy,
+};
+
+static int __init nf_conntrack_snmp_init(void)
+{
+       exp_policy.timeout = timeout;
+       return nf_conntrack_helper_register(&helper);
+}
+
+static void __exit nf_conntrack_snmp_fini(void)
+{
+       nf_conntrack_helper_unregister(&helper);
+}
+
+module_init(nf_conntrack_snmp_init);
+module_exit(nf_conntrack_snmp_fini);
index b4d7f0f..0ae1428 100644 (file)
@@ -29,6 +29,8 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+#include <linux/rculist_nulls.h>
 
 MODULE_LICENSE("GPL");
 
@@ -45,6 +47,7 @@ EXPORT_SYMBOL_GPL(print_tuple);
 struct ct_iter_state {
        struct seq_net_private p;
        unsigned int bucket;
+       u_int64_t time_now;
 };
 
 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
@@ -56,7 +59,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
        for (st->bucket = 0;
             st->bucket < net->ct.htable_size;
             st->bucket++) {
-               n = rcu_dereference(net->ct.hash[st->bucket].first);
+               n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
                if (!is_a_nulls(n))
                        return n;
        }
@@ -69,13 +72,15 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
        struct net *net = seq_file_net(seq);
        struct ct_iter_state *st = seq->private;
 
-       head = rcu_dereference(head->next);
+       head = rcu_dereference(hlist_nulls_next_rcu(head));
        while (is_a_nulls(head)) {
                if (likely(get_nulls_value(head) == st->bucket)) {
                        if (++st->bucket >= net->ct.htable_size)
                                return NULL;
                }
-               head = rcu_dereference(net->ct.hash[st->bucket].first);
+               head = rcu_dereference(
+                               hlist_nulls_first_rcu(
+                                       &net->ct.hash[st->bucket]));
        }
        return head;
 }
@@ -93,6 +98,9 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
 static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
        __acquires(RCU)
 {
+       struct ct_iter_state *st = seq->private;
+
+       st->time_now = ktime_to_ns(ktime_get_real());
        rcu_read_lock();
        return ct_get_idx(seq, *pos);
 }
@@ -132,6 +140,34 @@ static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
 }
 #endif
 
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+       struct ct_iter_state *st = s->private;
+       struct nf_conn_tstamp *tstamp;
+       s64 delta_time;
+
+       tstamp = nf_conn_tstamp_find(ct);
+       if (tstamp) {
+               delta_time = st->time_now - tstamp->start;
+               if (delta_time > 0)
+                       delta_time = div_s64(delta_time, NSEC_PER_SEC);
+               else
+                       delta_time = 0;
+
+               return seq_printf(s, "delta-time=%llu ",
+                                 (unsigned long long)delta_time);
+       }
+       return 0;
+}
+#else
+static inline int
+ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
+{
+       return 0;
+}
+#endif
+
 /* return 0 on success, 1 in case of error */
 static int ct_seq_show(struct seq_file *s, void *v)
 {
@@ -200,6 +236,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
                goto release;
 #endif
 
+       if (ct_show_delta_time(s, ct))
+               goto release;
+
        if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
                goto release;
 
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
new file mode 100644 (file)
index 0000000..af7dd31
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+
+#include <linux/netfilter.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_timestamp.h>
+
+static int nf_ct_tstamp __read_mostly;
+
+module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
+MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table tstamp_sysctl_table[] = {
+       {
+               .procname       = "nf_conntrack_timestamp",
+               .data           = &init_net.ct.sysctl_tstamp,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+       {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type tstamp_extend __read_mostly = {
+       .len    = sizeof(struct nf_conn_tstamp),
+       .align  = __alignof__(struct nf_conn_tstamp),
+       .id     = NF_CT_EXT_TSTAMP,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+       struct ctl_table *table;
+
+       table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
+                       GFP_KERNEL);
+       if (!table)
+               goto out;
+
+       table[0].data = &net->ct.sysctl_tstamp;
+
+       net->ct.tstamp_sysctl_header = register_net_sysctl_table(net,
+                       nf_net_netfilter_sysctl_path, table);
+       if (!net->ct.tstamp_sysctl_header) {
+               printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n");
+               goto out_register;
+       }
+       return 0;
+
+out_register:
+       kfree(table);
+out:
+       return -ENOMEM;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+       struct ctl_table *table;
+
+       table = net->ct.tstamp_sysctl_header->ctl_table_arg;
+       unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
+       kfree(table);
+}
+#else
+static int nf_conntrack_tstamp_init_sysctl(struct net *net)
+{
+       return 0;
+}
+
+static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
+{
+}
+#endif
+
+int nf_conntrack_tstamp_init(struct net *net)
+{
+       int ret;
+
+       net->ct.sysctl_tstamp = nf_ct_tstamp;
+
+       if (net_eq(net, &init_net)) {
+               ret = nf_ct_extend_register(&tstamp_extend);
+               if (ret < 0) {
+                       printk(KERN_ERR "nf_ct_tstamp: Unable to register "
+                                       "extension\n");
+                       goto out_extend_register;
+               }
+       }
+
+       ret = nf_conntrack_tstamp_init_sysctl(net);
+       if (ret < 0)
+               goto out_sysctl;
+
+       return 0;
+
+out_sysctl:
+       if (net_eq(net, &init_net))
+               nf_ct_extend_unregister(&tstamp_extend);
+out_extend_register:
+       return ret;
+}
+
+void nf_conntrack_tstamp_fini(struct net *net)
+{
+       nf_conntrack_tstamp_fini_sysctl(net);
+       if (net_eq(net, &init_net))
+               nf_ct_extend_unregister(&tstamp_extend);
+}
index b07393e..20c775c 100644 (file)
@@ -161,7 +161,8 @@ static int seq_show(struct seq_file *s, void *v)
        struct nf_logger *t;
        int ret;
 
-       logger = nf_loggers[*pos];
+       logger = rcu_dereference_protected(nf_loggers[*pos],
+                                          lockdep_is_held(&nf_log_mutex));
 
        if (!logger)
                ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -249,7 +250,8 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
                mutex_unlock(&nf_log_mutex);
        } else {
                mutex_lock(&nf_log_mutex);
-               logger = nf_loggers[tindex];
+               logger = rcu_dereference_protected(nf_loggers[tindex],
+                                                  lockdep_is_held(&nf_log_mutex));
                if (!logger)
                        table->data = "NONE";
                else
index 74aebed..5ab22e2 100644 (file)
@@ -27,14 +27,17 @@ static DEFINE_MUTEX(queue_handler_mutex);
 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
        int ret;
+       const struct nf_queue_handler *old;
 
        if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] == qh)
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old == qh)
                ret = -EEXIST;
-       else if (queue_handler[pf])
+       else if (old)
                ret = -EBUSY;
        else {
                rcu_assign_pointer(queue_handler[pf], qh);
@@ -49,11 +52,15 @@ EXPORT_SYMBOL(nf_register_queue_handler);
 /* The caller must flush their queue before this */
 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
 {
+       const struct nf_queue_handler *old;
+
        if (pf >= ARRAY_SIZE(queue_handler))
                return -EINVAL;
 
        mutex_lock(&queue_handler_mutex);
-       if (queue_handler[pf] && queue_handler[pf] != qh) {
+       old = rcu_dereference_protected(queue_handler[pf],
+                                       lockdep_is_held(&queue_handler_mutex));
+       if (old && old != qh) {
                mutex_unlock(&queue_handler_mutex);
                return -EINVAL;
        }
@@ -73,7 +80,10 @@ void nf_unregister_queue_handlers(const struct nf_queue_handler *qh)
 
        mutex_lock(&queue_handler_mutex);
        for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++)  {
-               if (queue_handler[pf] == qh)
+               if (rcu_dereference_protected(
+                               queue_handler[pf],
+                               lockdep_is_held(&queue_handler_mutex)
+                               ) == qh)
                        rcu_assign_pointer(queue_handler[pf], NULL);
        }
        mutex_unlock(&queue_handler_mutex);
@@ -115,7 +125,7 @@ static int __nf_queue(struct sk_buff *skb,
                      int (*okfn)(struct sk_buff *),
                      unsigned int queuenum)
 {
-       int status;
+       int status = -ENOENT;
        struct nf_queue_entry *entry = NULL;
 #ifdef CONFIG_BRIDGE_NETFILTER
        struct net_device *physindev;
@@ -128,16 +138,20 @@ static int __nf_queue(struct sk_buff *skb,
        rcu_read_lock();
 
        qh = rcu_dereference(queue_handler[pf]);
-       if (!qh)
+       if (!qh) {
+               status = -ESRCH;
                goto err_unlock;
+       }
 
        afinfo = nf_get_afinfo(pf);
        if (!afinfo)
                goto err_unlock;
 
        entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
-       if (!entry)
+       if (!entry) {
+               status = -ENOMEM;
                goto err_unlock;
+       }
 
        *entry = (struct nf_queue_entry) {
                .skb    = skb,
@@ -151,11 +165,9 @@ static int __nf_queue(struct sk_buff *skb,
 
        /* If it's going away, ignore hook. */
        if (!try_module_get(entry->elem->owner)) {
-               rcu_read_unlock();
-               kfree(entry);
-               return 0;
+               status = -ECANCELED;
+               goto err_unlock;
        }
-
        /* Bump dev refs so they don't vanish while packet is out */
        if (indev)
                dev_hold(indev);
@@ -182,14 +194,13 @@ static int __nf_queue(struct sk_buff *skb,
                goto err;
        }
 
-       return 1;
+       return 0;
 
 err_unlock:
        rcu_read_unlock();
 err:
-       kfree_skb(skb);
        kfree(entry);
-       return 1;
+       return status;
 }
 
 int nf_queue(struct sk_buff *skb,
@@ -201,6 +212,8 @@ int nf_queue(struct sk_buff *skb,
             unsigned int queuenum)
 {
        struct sk_buff *segs;
+       int err;
+       unsigned int queued;
 
        if (!skb_is_gso(skb))
                return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
@@ -216,20 +229,35 @@ int nf_queue(struct sk_buff *skb,
        }
 
        segs = skb_gso_segment(skb, 0);
-       kfree_skb(skb);
+       /* Does not use PTR_ERR to limit the number of error codes that can be
+        * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean
+        * 'ignore this hook'.
+        */
        if (IS_ERR(segs))
-               return 1;
+               return -EINVAL;
 
+       queued = 0;
+       err = 0;
        do {
                struct sk_buff *nskb = segs->next;
 
                segs->next = NULL;
-               if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
-                               queuenum))
+               if (err == 0)
+                       err = __nf_queue(segs, elem, pf, hook, indev,
+                                          outdev, okfn, queuenum);
+               if (err == 0)
+                       queued++;
+               else
                        kfree_skb(segs);
                segs = nskb;
        } while (segs);
-       return 1;
+
+       /* also free orig skb if only some segments were queued */
+       if (unlikely(err && queued))
+               err = 0;
+       if (err == 0)
+               kfree_skb(skb);
+       return err;
 }
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
@@ -237,6 +265,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
        struct sk_buff *skb = entry->skb;
        struct list_head *elem = &entry->elem->list;
        const struct nf_afinfo *afinfo;
+       int err;
 
        rcu_read_lock();
 
@@ -270,10 +299,17 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
                local_bh_enable();
                break;
        case NF_QUEUE:
-               if (!__nf_queue(skb, elem, entry->pf, entry->hook,
-                               entry->indev, entry->outdev, entry->okfn,
-                               verdict >> NF_VERDICT_BITS))
-                       goto next_hook;
+               err = __nf_queue(skb, elem, entry->pf, entry->hook,
+                                entry->indev, entry->outdev, entry->okfn,
+                                verdict >> NF_VERDICT_QBITS);
+               if (err < 0) {
+                       if (err == -ECANCELED)
+                               goto next_hook;
+                       if (err == -ESRCH &&
+                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+                               goto next_hook;
+                       kfree_skb(skb);
+               }
                break;
        case NF_STOLEN:
        default:
index 6a1572b..91592da 100644 (file)
@@ -874,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
        for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
                if (!hlist_empty(&instance_table[st->bucket]))
-                       return rcu_dereference_bh(instance_table[st->bucket].first);
+                       return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
        }
        return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-       h = rcu_dereference_bh(h->next);
+       h = rcu_dereference_bh(hlist_next_rcu(h));
        while (!h) {
                if (++st->bucket >= INSTANCE_BUCKETS)
                        return NULL;
 
-               h = rcu_dereference_bh(instance_table[st->bucket].first);
+               h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket]));
        }
        return h;
 }
index 68e67d1..b83123f 100644 (file)
@@ -387,25 +387,31 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 {
        struct sk_buff *nskb;
        struct nfqnl_instance *queue;
-       int err;
+       int err = -ENOBUFS;
 
        /* rcu_read_lock()ed by nf_hook_slow() */
        queue = instance_lookup(queuenum);
-       if (!queue)
+       if (!queue) {
+               err = -ESRCH;
                goto err_out;
+       }
 
-       if (queue->copy_mode == NFQNL_COPY_NONE)
+       if (queue->copy_mode == NFQNL_COPY_NONE) {
+               err = -EINVAL;
                goto err_out;
+       }
 
        nskb = nfqnl_build_packet_message(queue, entry);
-       if (nskb == NULL)
+       if (nskb == NULL) {
+               err = -ENOMEM;
                goto err_out;
-
+       }
        spin_lock_bh(&queue->lock);
 
-       if (!queue->peer_pid)
+       if (!queue->peer_pid) {
+               err = -EINVAL;
                goto err_out_free_nskb;
-
+       }
        if (queue->queue_total >= queue->queue_maxlen) {
                queue->queue_dropped++;
                if (net_ratelimit())
@@ -432,7 +438,7 @@ err_out_free_nskb:
 err_out_unlock:
        spin_unlock_bh(&queue->lock);
 err_out:
-       return -1;
+       return err;
 }
 
 static int
index c942376..0a77d2f 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/audit.h>
 #include <net/net_namespace.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -38,9 +39,8 @@ MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
 
 struct compat_delta {
-       struct compat_delta *next;
-       unsigned int offset;
-       int delta;
+       unsigned int offset; /* offset in kernel */
+       int delta; /* delta in 32bit user land */
 };
 
 struct xt_af {
@@ -49,7 +49,9 @@ struct xt_af {
        struct list_head target;
 #ifdef CONFIG_COMPAT
        struct mutex compat_mutex;
-       struct compat_delta *compat_offsets;
+       struct compat_delta *compat_tab;
+       unsigned int number; /* number of slots in compat_tab[] */
+       unsigned int cur; /* number of used slots in compat_tab[] */
 #endif
 };
 
@@ -414,54 +416,67 @@ int xt_check_match(struct xt_mtchk_param *par,
 EXPORT_SYMBOL_GPL(xt_check_match);
 
 #ifdef CONFIG_COMPAT
-int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta)
+int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
 {
-       struct compat_delta *tmp;
+       struct xt_af *xp = &xt[af];
 
-       tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL);
-       if (!tmp)
-               return -ENOMEM;
+       if (!xp->compat_tab) {
+               if (!xp->number)
+                       return -EINVAL;
+               xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number);
+               if (!xp->compat_tab)
+                       return -ENOMEM;
+               xp->cur = 0;
+       }
 
-       tmp->offset = offset;
-       tmp->delta = delta;
+       if (xp->cur >= xp->number)
+               return -EINVAL;
 
-       if (xt[af].compat_offsets) {
-               tmp->next = xt[af].compat_offsets->next;
-               xt[af].compat_offsets->next = tmp;
-       } else {
-               xt[af].compat_offsets = tmp;
-               tmp->next = NULL;
-       }
+       if (xp->cur)
+               delta += xp->compat_tab[xp->cur - 1].delta;
+       xp->compat_tab[xp->cur].offset = offset;
+       xp->compat_tab[xp->cur].delta = delta;
+       xp->cur++;
        return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_add_offset);
 
 void xt_compat_flush_offsets(u_int8_t af)
 {
-       struct compat_delta *tmp, *next;
-
-       if (xt[af].compat_offsets) {
-               for (tmp = xt[af].compat_offsets; tmp; tmp = next) {
-                       next = tmp->next;
-                       kfree(tmp);
-               }
-               xt[af].compat_offsets = NULL;
+       if (xt[af].compat_tab) {
+               vfree(xt[af].compat_tab);
+               xt[af].compat_tab = NULL;
+               xt[af].number = 0;
        }
 }
 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets);
 
 int xt_compat_calc_jump(u_int8_t af, unsigned int offset)
 {
-       struct compat_delta *tmp;
-       int delta;
-
-       for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next)
-               if (tmp->offset < offset)
-                       delta += tmp->delta;
-       return delta;
+       struct compat_delta *tmp = xt[af].compat_tab;
+       int mid, left = 0, right = xt[af].cur - 1;
+
+       while (left <= right) {
+               mid = (left + right) >> 1;
+               if (offset > tmp[mid].offset)
+                       left = mid + 1;
+               else if (offset < tmp[mid].offset)
+                       right = mid - 1;
+               else
+                       return mid ? tmp[mid - 1].delta : 0;
+       }
+       WARN_ON_ONCE(1);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(xt_compat_calc_jump);
 
+void xt_compat_init_offsets(u_int8_t af, unsigned int number)
+{
+       xt[af].number = number;
+       xt[af].cur = 0;
+}
+EXPORT_SYMBOL(xt_compat_init_offsets);
+
 int xt_compat_match_offset(const struct xt_match *match)
 {
        u_int16_t csize = match->compatsize ? : match->matchsize;
@@ -820,6 +835,21 @@ xt_replace_table(struct xt_table *table,
         */
        local_bh_enable();
 
+#ifdef CONFIG_AUDIT
+       if (audit_enabled) {
+               struct audit_buffer *ab;
+
+               ab = audit_log_start(current->audit_context, GFP_KERNEL,
+                                    AUDIT_NETFILTER_CFG);
+               if (ab) {
+                       audit_log_format(ab, "table=%s family=%u entries=%u",
+                                        table->name, table->af,
+                                        private->number);
+                       audit_log_end(ab);
+               }
+       }
+#endif
+
        return private;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -1338,7 +1368,7 @@ static int __init xt_init(void)
                mutex_init(&xt[i].mutex);
 #ifdef CONFIG_COMPAT
                mutex_init(&xt[i].compat_mutex);
-               xt[i].compat_offsets = NULL;
+               xt[i].compat_tab = NULL;
 #endif
                INIT_LIST_HEAD(&xt[i].target);
                INIT_LIST_HEAD(&xt[i].match);
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
new file mode 100644 (file)
index 0000000..81802d2
--- /dev/null
@@ -0,0 +1,204 @@
+/*
+ * Creates audit record for dropped/accepted packets
+ *
+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>
+ * (C) 2010-2011 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/audit.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_AUDIT.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>");
+MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets");
+MODULE_ALIAS("ipt_AUDIT");
+MODULE_ALIAS("ip6t_AUDIT");
+MODULE_ALIAS("ebt_AUDIT");
+MODULE_ALIAS("arpt_AUDIT");
+
+static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb,
+                       unsigned int proto, unsigned int offset)
+{
+       switch (proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_UDPLITE: {
+               const __be16 *pptr;
+               __be16 _ports[2];
+
+               pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports);
+               if (pptr == NULL) {
+                       audit_log_format(ab, " truncated=1");
+                       return;
+               }
+
+               audit_log_format(ab, " sport=%hu dport=%hu",
+                                ntohs(pptr[0]), ntohs(pptr[1]));
+               }
+               break;
+
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6: {
+               const u8 *iptr;
+               u8 _ih[2];
+
+               iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih);
+               if (iptr == NULL) {
+                       audit_log_format(ab, " truncated=1");
+                       return;
+               }
+
+               audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu",
+                                iptr[0], iptr[1]);
+
+               }
+               break;
+       }
+}
+
+static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+       struct iphdr _iph;
+       const struct iphdr *ih;
+
+       ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+       if (!ih) {
+               audit_log_format(ab, " truncated=1");
+               return;
+       }
+
+       audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu",
+               &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol);
+
+       if (ntohs(ih->frag_off) & IP_OFFSET) {
+               audit_log_format(ab, " frag=1");
+               return;
+       }
+
+       audit_proto(ab, skb, ih->protocol, ih->ihl * 4);
+}
+
+static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+       struct ipv6hdr _ip6h;
+       const struct ipv6hdr *ih;
+       u8 nexthdr;
+       int offset;
+
+       ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+       if (!ih) {
+               audit_log_format(ab, " truncated=1");
+               return;
+       }
+
+       nexthdr = ih->nexthdr;
+       offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h),
+                                 &nexthdr);
+
+       audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+                        &ih->saddr, &ih->daddr, nexthdr);
+
+       if (offset)
+               audit_proto(ab, skb, nexthdr, offset);
+}
+
+static unsigned int
+audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
+{
+       const struct xt_audit_info *info = par->targinfo;
+       struct audit_buffer *ab;
+
+       ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+       if (ab == NULL)
+               goto errout;
+
+       audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
+                        info->type, par->hooknum, skb->len,
+                        par->in ? par->in->name : "?",
+                        par->out ? par->out->name : "?");
+
+       if (skb->mark)
+               audit_log_format(ab, " mark=%#x", skb->mark);
+
+       if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
+               audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x",
+                                eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+                                ntohs(eth_hdr(skb)->h_proto));
+
+               if (par->family == NFPROTO_BRIDGE) {
+                       switch (eth_hdr(skb)->h_proto) {
+                       case __constant_htons(ETH_P_IP):
+                               audit_ip4(ab, skb);
+                               break;
+
+                       case __constant_htons(ETH_P_IPV6):
+                               audit_ip6(ab, skb);
+                               break;
+                       }
+               }
+       }
+
+       switch (par->family) {
+       case NFPROTO_IPV4:
+               audit_ip4(ab, skb);
+               break;
+
+       case NFPROTO_IPV6:
+               audit_ip6(ab, skb);
+               break;
+       }
+
+       audit_log_end(ab);
+
+errout:
+       return XT_CONTINUE;
+}
+
+static int audit_tg_check(const struct xt_tgchk_param *par)
+{
+       const struct xt_audit_info *info = par->targinfo;
+
+       if (info->type > XT_AUDIT_TYPE_MAX) {
+               pr_info("Audit type out of range (valid range: 0..%hhu)\n",
+                       XT_AUDIT_TYPE_MAX);
+               return -ERANGE;
+       }
+
+       return 0;
+}
+
+static struct xt_target audit_tg_reg __read_mostly = {
+       .name           = "AUDIT",
+       .family         = NFPROTO_UNSPEC,
+       .target         = audit_tg,
+       .targetsize     = sizeof(struct xt_audit_info),
+       .checkentry     = audit_tg_check,
+       .me             = THIS_MODULE,
+};
+
+static int __init audit_tg_init(void)
+{
+       return xt_register_target(&audit_tg_reg);
+}
+
+static void __exit audit_tg_exit(void)
+{
+       xt_unregister_target(&audit_tg_reg);
+}
+
+module_init(audit_tg_init);
+module_exit(audit_tg_exit);
index c2c0e4a..af9c4da 100644 (file)
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_CLASSIFY.h>
+#include <linux/netfilter_arp.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Xtables: Qdisc classification");
 MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
+MODULE_ALIAS("arpt_CLASSIFY");
 
 static unsigned int
 classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
@@ -35,26 +37,36 @@ classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
        return XT_CONTINUE;
 }
 
-static struct xt_target classify_tg_reg __read_mostly = {
-       .name       = "CLASSIFY",
-       .revision   = 0,
-       .family     = NFPROTO_UNSPEC,
-       .table      = "mangle",
-       .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
-                     (1 << NF_INET_POST_ROUTING),
-       .target     = classify_tg,
-       .targetsize = sizeof(struct xt_classify_target_info),
-       .me         = THIS_MODULE,
+static struct xt_target classify_tg_reg[] __read_mostly = {
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+               .family     = NFPROTO_UNSPEC,
+               .hooks      = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) |
+                             (1 << NF_INET_POST_ROUTING),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
+       {
+               .name       = "CLASSIFY",
+               .revision   = 0,
+               .family     = NFPROTO_ARP,
+               .hooks      = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD),
+               .target     = classify_tg,
+               .targetsize = sizeof(struct xt_classify_target_info),
+               .me         = THIS_MODULE,
+       },
 };
 
 static int __init classify_tg_init(void)
 {
-       return xt_register_target(&classify_tg_reg);
+       return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 static void __exit classify_tg_exit(void)
 {
-       xt_unregister_target(&classify_tg_reg);
+       xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg));
 }
 
 module_init(classify_tg_init);
index be1f22e..3bdd443 100644 (file)
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
 MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
 MODULE_DESCRIPTION("Xtables: idle time monitor");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
index a414050..993de2b 100644 (file)
@@ -31,6 +31,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
 MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
 
 static LIST_HEAD(xt_led_triggers);
 static DEFINE_MUTEX(xt_led_mutex);
index 039cce1..d4f4b5d 100644 (file)
@@ -72,18 +72,31 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 
        if (info->queues_total > 1) {
                if (par->family == NFPROTO_IPV4)
-                       queue = hash_v4(skb) % info->queues_total + queue;
+                       queue = (((u64) hash_v4(skb) * info->queues_total) >>
+                                32) + queue;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
                else if (par->family == NFPROTO_IPV6)
-                       queue = hash_v6(skb) % info->queues_total + queue;
+                       queue = (((u64) hash_v6(skb) * info->queues_total) >>
+                                32) + queue;
 #endif
        }
        return NF_QUEUE_NR(queue);
 }
 
-static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static unsigned int
+nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       const struct xt_NFQ_info_v1 *info = par->targinfo;
+       const struct xt_NFQ_info_v2 *info = par->targinfo;
+       unsigned int ret = nfqueue_tg_v1(skb, par);
+
+       if (info->bypass)
+               ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+       return ret;
+}
+
+static int nfqueue_tg_check(const struct xt_tgchk_param *par)
+{
+       const struct xt_NFQ_info_v2 *info = par->targinfo;
        u32 maxid;
 
        if (unlikely(!rnd_inited)) {
@@ -100,6 +113,8 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
                       info->queues_total, maxid);
                return -ERANGE;
        }
+       if (par->target->revision == 2 && info->bypass > 1)
+               return -EINVAL;
        return 0;
 }
 
@@ -115,11 +130,20 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
                .name           = "NFQUEUE",
                .revision       = 1,
                .family         = NFPROTO_UNSPEC,
-               .checkentry     = nfqueue_tg_v1_check,
+               .checkentry     = nfqueue_tg_check,
                .target         = nfqueue_tg_v1,
                .targetsize     = sizeof(struct xt_NFQ_info_v1),
                .me             = THIS_MODULE,
        },
+       {
+               .name           = "NFQUEUE",
+               .revision       = 2,
+               .family         = NFPROTO_UNSPEC,
+               .checkentry     = nfqueue_tg_check,
+               .target         = nfqueue_tg_v2,
+               .targetsize     = sizeof(struct xt_NFQ_info_v2),
+               .me             = THIS_MODULE,
+       },
 };
 
 static int __init nfqueue_tg_init(void)
index 5c5b6b9..7fd3fd5 100644 (file)
@@ -193,10 +193,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
        if (par->family == NFPROTO_IPV6) {
                const struct ipv6hdr *iph = ipv6_hdr(skb);
-               memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr));
+               memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
+                      &iph->daddr : &iph->saddr, sizeof(addr.ip6));
        } else {
                const struct iphdr *iph = ip_hdr(skb);
-               addr.ip = iph->saddr;
+               addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
+                         iph->daddr : iph->saddr;
        }
 
        spin_lock_bh(&info->data->lock);
@@ -204,13 +206,12 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
                                 &info->mask, par->family);
        spin_unlock_bh(&info->data->lock);
 
-       if (connections < 0) {
+       if (connections < 0)
                /* kmalloc failed, drop it entirely */
-               par->hotdrop = true;
-               return false;
-       }
+               goto hotdrop;
 
-       return (connections > info->limit) ^ info->inverse;
+       return (connections > info->limit) ^
+              !!(info->flags & XT_CONNLIMIT_INVERT);
 
  hotdrop:
        par->hotdrop = true;
@@ -268,25 +269,38 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
        kfree(info->data);
 }
 
-static struct xt_match connlimit_mt_reg __read_mostly = {
-       .name       = "connlimit",
-       .revision   = 0,
-       .family     = NFPROTO_UNSPEC,
-       .checkentry = connlimit_mt_check,
-       .match      = connlimit_mt,
-       .matchsize  = sizeof(struct xt_connlimit_info),
-       .destroy    = connlimit_mt_destroy,
-       .me         = THIS_MODULE,
+static struct xt_match connlimit_mt_reg[] __read_mostly = {
+       {
+               .name       = "connlimit",
+               .revision   = 0,
+               .family     = NFPROTO_UNSPEC,
+               .checkentry = connlimit_mt_check,
+               .match      = connlimit_mt,
+               .matchsize  = sizeof(struct xt_connlimit_info),
+               .destroy    = connlimit_mt_destroy,
+               .me         = THIS_MODULE,
+       },
+       {
+               .name       = "connlimit",
+               .revision   = 1,
+               .family     = NFPROTO_UNSPEC,
+               .checkentry = connlimit_mt_check,
+               .match      = connlimit_mt,
+               .matchsize  = sizeof(struct xt_connlimit_info),
+               .destroy    = connlimit_mt_destroy,
+               .me         = THIS_MODULE,
+       },
 };
 
 static int __init connlimit_mt_init(void)
 {
-       return xt_register_match(&connlimit_mt_reg);
+       return xt_register_matches(connlimit_mt_reg,
+              ARRAY_SIZE(connlimit_mt_reg));
 }
 
 static void __exit connlimit_mt_exit(void)
 {
-       xt_unregister_match(&connlimit_mt_reg);
+       xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg));
 }
 
 module_init(connlimit_mt_init);
index e536710..4ef1b63 100644 (file)
@@ -112,6 +112,54 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
        return true;
 }
 
+static inline bool
+port_match(u16 min, u16 max, u16 port, bool invert)
+{
+       return (port >= min && port <= max) ^ invert;
+}
+
+static inline bool
+ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info,
+                      const struct nf_conn *ct)
+{
+       const struct nf_conntrack_tuple *tuple;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+       if ((info->match_flags & XT_CONNTRACK_PROTO) &&
+           (nf_ct_protonum(ct) == info->l4proto) ^
+           !(info->invert_flags & XT_CONNTRACK_PROTO))
+               return false;
+
+       /* Shortcut to match all recognized protocols by using ->src.all. */
+       if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) &&
+           !port_match(info->origsrc_port, info->origsrc_port_high,
+                       ntohs(tuple->src.u.all),
+                       info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT))
+               return false;
+
+       if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) &&
+           !port_match(info->origdst_port, info->origdst_port_high,
+                       ntohs(tuple->dst.u.all),
+                       info->invert_flags & XT_CONNTRACK_ORIGDST_PORT))
+               return false;
+
+       tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+       if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) &&
+           !port_match(info->replsrc_port, info->replsrc_port_high,
+                       ntohs(tuple->src.u.all),
+                       info->invert_flags & XT_CONNTRACK_REPLSRC_PORT))
+               return false;
+
+       if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) &&
+           !port_match(info->repldst_port, info->repldst_port_high,
+                       ntohs(tuple->dst.u.all),
+                       info->invert_flags & XT_CONNTRACK_REPLDST_PORT))
+               return false;
+
+       return true;
+}
+
 static bool
 conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
@@ -170,8 +218,13 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
                    !(info->invert_flags & XT_CONNTRACK_REPLDST))
                        return false;
 
-       if (!ct_proto_port_check(info, ct))
-               return false;
+       if (par->match->revision != 3) {
+               if (!ct_proto_port_check(info, ct))
+                       return false;
+       } else {
+               if (!ct_proto_port_check_v3(par->matchinfo, ct))
+                       return false;
+       }
 
        if ((info->match_flags & XT_CONNTRACK_STATUS) &&
            (!!(status_mask & ct->status) ^
@@ -207,6 +260,14 @@ conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
        return conntrack_mt(skb, par, info->state_mask, info->status_mask);
 }
 
+static bool
+conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       const struct xt_conntrack_mtinfo3 *info = par->matchinfo;
+
+       return conntrack_mt(skb, par, info->state_mask, info->status_mask);
+}
+
 static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
        int ret;
@@ -244,6 +305,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
                .destroy    = conntrack_mt_destroy,
                .me         = THIS_MODULE,
        },
+       {
+               .name       = "conntrack",
+               .revision   = 3,
+               .family     = NFPROTO_UNSPEC,
+               .matchsize  = sizeof(struct xt_conntrack_mtinfo3),
+               .match      = conntrack_mt_v3,
+               .checkentry = conntrack_mt_check,
+               .destroy    = conntrack_mt_destroy,
+               .me         = THIS_MODULE,
+       },
 };
 
 static int __init conntrack_mt_init(void)
index b39db8a..c7a2e54 100644 (file)
@@ -22,6 +22,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
 MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
 
 static int cpu_mt_check(const struct xt_mtchk_param *par)
 {
index 9127a3d..bb10b07 100644 (file)
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
        /*
         * Check if the packet belongs to an existing entry
         */
-       cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+       cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
        if (unlikely(cp == NULL)) {
                match = false;
                goto out;
index 91cb1d7..c60649e 100644 (file)
@@ -164,7 +164,6 @@ struct packet_mreq_max {
 static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
                int closing, int tx_ring);
 
-#define PGV_FROM_VMALLOC 1
 struct pgv {
        char *buffer;
 };
@@ -523,11 +522,11 @@ static inline unsigned int run_filter(const struct sk_buff *skb,
 {
        struct sk_filter *filter;
 
-       rcu_read_lock_bh();
-       filter = rcu_dereference_bh(sk->sk_filter);
+       rcu_read_lock();
+       filter = rcu_dereference(sk->sk_filter);
        if (filter != NULL)
                res = sk_run_filter(skb, filter->insns);
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
 
        return res;
 }
index 9542449..da8adac 100644 (file)
@@ -50,7 +50,6 @@ rdsdebug(char *fmt, ...)
 #define RDS_FRAG_SIZE  ((unsigned int)(1 << RDS_FRAG_SHIFT))
 
 #define RDS_CONG_MAP_BYTES     (65536 / 8)
-#define RDS_CONG_MAP_LONGS     (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
 #define RDS_CONG_MAP_PAGES     (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
 #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
 
index f04d4a4..e318f45 100644 (file)
@@ -205,6 +205,18 @@ config NET_SCH_DRR
 
          If unsure, say N.
 
+config NET_SCH_MQPRIO
+       tristate "Multi-queue priority scheduler (MQPRIO)"
+       help
+         Say Y here if you want to use the Multi-queue Priority scheduler.
+         This scheduler allows QOS to be offloaded on NICs that have support
+         for offloading QOS schedulers.
+
+         To compile this driver as a module, choose M here: the module will
+         be called sch_mqprio.
+
+         If unsure, say N.
+
 config NET_SCH_INGRESS
        tristate "Ingress Qdisc"
        depends on NET_CLS_ACT
@@ -243,7 +255,7 @@ config NET_CLS_TCINDEX
 
 config NET_CLS_ROUTE4
        tristate "Routing decision (ROUTE)"
-       select NET_CLS_ROUTE
+       select IP_ROUTE_CLASSID
        select NET_CLS
        ---help---
          If you say Y here, you will be able to classify packets
@@ -252,9 +264,6 @@ config NET_CLS_ROUTE4
          To compile this code as a module, choose M here: the
          module will be called cls_route.
 
-config NET_CLS_ROUTE
-       bool
-
 config NET_CLS_FW
        tristate "Netfilter mark (FW)"
        select NET_CLS
index 960f5db..26ce681 100644 (file)
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)  += sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)      += sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)    += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)      += sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)   += sch_mqprio.o
 obj-$(CONFIG_NET_CLS_U32)      += cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)   += cls_route.o
 obj-$(CONFIG_NET_CLS_FW)       += cls_fw.o
index 23b25f8..15873e1 100644 (file)
@@ -78,7 +78,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
                           struct tc_action *a, struct tcf_hashinfo *hinfo)
 {
        struct tcf_common *p;
-       int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+       int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
        struct nlattr *nest;
 
        read_lock_bh(hinfo->lock);
@@ -126,7 +126,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 {
        struct tcf_common *p, *s_p;
        struct nlattr *nest;
-       int i= 0, n_i = 0;
+       int i = 0, n_i = 0;
 
        nest = nla_nest_start(skb, a->order);
        if (nest == NULL)
@@ -138,7 +138,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
                while (p != NULL) {
                        s_p = p->tcfc_next;
                        if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
-                                module_put(a->ops->owner);
+                               module_put(a->ops->owner);
                        n_i++;
                        p = s_p;
                }
@@ -447,7 +447,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
        nest = nla_nest_start(skb, TCA_OPTIONS);
        if (nest == NULL)
                goto nla_put_failure;
-       if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+       err = tcf_action_dump_old(skb, a, bind, ref);
+       if (err > 0) {
                nla_nest_end(skb, nest);
                return err;
        }
@@ -491,7 +492,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
        struct tc_action *a;
        struct tc_action_ops *a_o;
        char act_name[IFNAMSIZ];
-       struct nlattr *tb[TCA_ACT_MAX+1];
+       struct nlattr *tb[TCA_ACT_MAX + 1];
        struct nlattr *kind;
        int err;
 
@@ -549,9 +550,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
                goto err_free;
 
        /* module count goes up only when brand new policy is created
-          if it exists and is only bound to in a_o->init() then
-          ACT_P_CREATED is not returned (a zero is).
-       */
+        * if it exists and is only bound to in a_o->init() then
+        * ACT_P_CREATED is not returned (a zero is).
+        */
        if (err != ACT_P_CREATED)
                module_put(a_o->owner);
        a->ops = a_o;
@@ -569,7 +570,7 @@ err_out:
 struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
                                  char *name, int ovr, int bind)
 {
-       struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+       struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *head = NULL, *act, *act_prev = NULL;
        int err;
        int i;
@@ -697,7 +698,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
 static struct tc_action *
 tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 {
-       struct nlattr *tb[TCA_ACT_MAX+1];
+       struct nlattr *tb[TCA_ACT_MAX + 1];
        struct tc_action *a;
        int index;
        int err;
@@ -770,7 +771,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
        struct tcamsg *t;
        struct netlink_callback dcb;
        struct nlattr *nest;
-       struct nlattr *tb[TCA_ACT_MAX+1];
+       struct nlattr *tb[TCA_ACT_MAX + 1];
        struct nlattr *kind;
        struct tc_action *a = create_a(0);
        int err = -ENOMEM;
@@ -821,7 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
        nlh->nlmsg_flags |= NLM_F_ROOT;
        module_put(a->ops->owner);
        kfree(a);
-       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                            n->nlmsg_flags & NLM_F_ECHO);
        if (err > 0)
                return 0;
 
@@ -842,14 +844,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
              u32 pid, int event)
 {
        int i, ret;
-       struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+       struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *head = NULL, *act, *act_prev = NULL;
 
        ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
        if (ret < 0)
                return ret;
 
-       if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+       if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
                if (tb[1] != NULL)
                        return tca_action_flush(net, tb[1], n, pid);
                else
@@ -892,7 +894,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                /* now do the delete */
                tcf_action_destroy(head, 0);
                ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
-                                    n->nlmsg_flags&NLM_F_ECHO);
+                                    n->nlmsg_flags & NLM_F_ECHO);
                if (ret > 0)
                        return 0;
                return ret;
@@ -936,7 +938,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
        NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
        if (err > 0)
                err = 0;
        return err;
@@ -967,7 +969,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
 
        /* dump then free all the actions after update; inserted policy
         * stays intact
-        * */
+        */
        ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
        for (a = act; a; a = act) {
                act = a->next;
@@ -993,8 +995,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                return -EINVAL;
        }
 
-       /* n->nlmsg_flags&NLM_F_CREATE
-        * */
+       /* n->nlmsg_flags & NLM_F_CREATE */
        switch (n->nlmsg_type) {
        case RTM_NEWACTION:
                /* we are going to assume all other flags
@@ -1003,7 +1004,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                 * but since we want avoid ambiguity (eg when flags
                 * is zero) then just set this
                 */
-               if (n->nlmsg_flags&NLM_F_REPLACE)
+               if (n->nlmsg_flags & NLM_F_REPLACE)
                        ovr = 1;
 replay:
                ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1029,7 @@ replay:
 static struct nlattr *
 find_dump_kind(const struct nlmsghdr *n)
 {
-       struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+       struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct nlattr *nla[TCAA_MAX + 1];
        struct nlattr *kind;
@@ -1071,9 +1072,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
        }
 
        a_o = tc_lookup_action(kind);
-       if (a_o == NULL) {
+       if (a_o == NULL)
                return 0;
-       }
 
        memset(&a, 0, sizeof(struct tc_action));
        a.ops = a_o;
index 83ddfc0..6cdf9ab 100644 (file)
@@ -63,7 +63,7 @@ static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
        if (nla == NULL)
                return -EINVAL;
 
-       err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
+       err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
        if (err < 0)
                return err;
 
index c2ed90a..2b4ab4b 100644 (file)
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
 }
 
 typedef int (*g_rand)(struct tcf_gact *gact);
-static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
 #endif /* CONFIG_GACT_PROB */
 
 static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
                                     bind, &gact_idx_gen, &gact_hash_info);
                if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                ret = ACT_P_CREATED;
        } else {
                if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
 static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
-       printk(KERN_INFO "GACT probability on\n");
+       pr_info("GACT probability on\n");
 #else
-       printk(KERN_INFO "GACT probability NOT on\n");
+       pr_info("GACT probability NOT on\n");
 #endif
        return tcf_register_action(&act_gact_ops);
 }
index c2a7c20..9fc211a 100644 (file)
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
                                     &ipt_idx_gen, &ipt_hash_info);
                if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                ret = ACT_P_CREATED;
        } else {
                if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
        if (unlikely(!t))
                goto err2;
 
-       if ((err = ipt_init_target(t, tname, hook)) < 0)
+       err = ipt_init_target(t, tname, hook);
+       if (err < 0)
                goto err3;
 
        spin_lock_bh(&ipt->tcf_lock);
@@ -212,8 +213,9 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
        bstats_update(&ipt->tcf_bstats, skb);
 
        /* yes, we have to worry about both in and out dev
-        worry later - danger - this API seems to have changed
-        from earlier kernels */
+        * worry later - danger - this API seems to have changed
+        * from earlier kernels
+        */
        par.in       = skb->dev;
        par.out      = NULL;
        par.hooknum  = ipt->tcfi_hook;
@@ -253,9 +255,9 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
        struct tc_cnt c;
 
        /* for simple targets kernel size == user size
-       ** user name = target name
-       ** for foolproof you need to not assume this
-       */
+        * user name = target name
+        * for foolproof you need to not assume this
+        */
 
        t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
        if (unlikely(!t))
index d765067..961386e 100644 (file)
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
        .lock   =       &mirred_lock,
 };
 
-static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
        if (m) {
                if (bind)
                        m->tcf_bindcnt--;
                m->tcf_refcnt--;
-               if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+               if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
                        list_del(&m->tcfm_list);
                        if (m->tcfm_dev)
                                dev_put(m->tcfm_dev);
index 178a4bd..762b027 100644 (file)
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
                                     &nat_idx_gen, &nat_hash_info);
                if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                p = to_tcf_nat(pc);
                ret = ACT_P_CREATED;
        } else {
index 445bef7..50c7c06 100644 (file)
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
                                     &pedit_idx_gen, &pedit_hash_info);
                if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
                p = to_pedit(pc);
                keys = kmalloc(ksize, GFP_KERNEL);
                if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
        int i, munged = 0;
        unsigned int off;
 
-       if (skb_cloned(skb)) {
-               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
-                       return p->tcf_action;
-               }
-       }
+       if (skb_cloned(skb) &&
+           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+               return p->tcf_action;
 
        off = skb_network_offset(skb);
 
index e2f08b1..8a16307 100644 (file)
@@ -22,8 +22,8 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
 
 #define POL_TAB_MASK     15
 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
 };
 
 /* old policer structure from before tc actions */
-struct tc_police_compat
-{
+struct tc_police_compat {
        u32                     index;
        int                     action;
        u32                     limit;
@@ -139,7 +138,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
                                 struct tc_action *a, int ovr, int bind)
 {
-       unsigned h;
+       unsigned int h;
        int ret = 0, err;
        struct nlattr *tb[TCA_POLICE_MAX + 1];
        struct tc_police *parm;
index 7287cff..a34a22d 100644 (file)
@@ -47,7 +47,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
        /* print policy string followed by _ then packet count
         * Example if this was the 3rd packet and the string was "hello"
         * then it would look like "hello_3" (without quotes)
-        **/
+        */
        pr_info("simple: %s_%d\n",
               (char *)d->tcfd_defdata, d->tcf_bstats.packets);
        spin_unlock(&d->tcf_lock);
@@ -125,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
                                     &simp_idx_gen, &simp_hash_info);
                if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
 
                d = to_defact(pc);
                ret = alloc_defdata(d, defdata);
@@ -149,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
        return ret;
 }
 
-static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+static int tcf_simp_cleanup(struct tc_action *a, int bind)
 {
        struct tcf_defact *d = a->priv;
 
@@ -158,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
        return 0;
 }
 
-static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
-                               int bind, int ref)
+static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+                        int bind, int ref)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_defact *d = a->priv;
index 836f5fe..5f6f0c7 100644 (file)
@@ -113,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
                                     &skbedit_idx_gen, &skbedit_hash_info);
                if (IS_ERR(pc))
-                   return PTR_ERR(pc);
+                       return PTR_ERR(pc);
 
                d = to_skbedit(pc);
                ret = ACT_P_CREATED;
@@ -144,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
        return ret;
 }
 
-static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
 {
        struct tcf_skbedit *d = a->priv;
 
@@ -153,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
        return 0;
 }
 
-static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
-                               int bind, int ref)
+static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+                           int bind, int ref)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_skbedit *d = a->priv;
index 5fd0c28..bb2c523 100644 (file)
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
        int rc = -ENOENT;
 
        write_lock(&cls_mod_lock);
-       for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+       for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
                if (t == ops)
                        break;
 
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
        u32 first = TC_H_MAKE(0xC0000000U, 0U);
 
        if (tp)
-               first = tp->prio-1;
+               first = tp->prio - 1;
 
        return first;
 }
@@ -149,7 +149,8 @@ replay:
 
        if (prio == 0) {
                /* If no priority is given, user wants we allocated it. */
-               if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+               if (n->nlmsg_type != RTM_NEWTFILTER ||
+                   !(n->nlmsg_flags & NLM_F_CREATE))
                        return -ENOENT;
                prio = TC_H_MAKE(0x80000000U, 0U);
        }
@@ -176,7 +177,8 @@ replay:
        }
 
        /* Is it classful? */
-       if ((cops = q->ops->cl_ops) == NULL)
+       cops = q->ops->cl_ops;
+       if (!cops)
                return -EINVAL;
 
        if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
                goto errout;
 
        /* Check the chain for existence of proto-tcf with this priority */
-       for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+       for (back = chain; (tp = *back) != NULL; back = &tp->next) {
                if (tp->prio >= prio) {
                        if (tp->prio == prio) {
-                               if (!nprio || (tp->protocol != protocol && protocol))
+                               if (!nprio ||
+                                   (tp->protocol != protocol && protocol))
                                        goto errout;
                        } else
                                tp = NULL;
@@ -216,7 +219,8 @@ replay:
                        goto errout;
 
                err = -ENOENT;
-               if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+               if (n->nlmsg_type != RTM_NEWTFILTER ||
+                   !(n->nlmsg_flags & NLM_F_CREATE))
                        goto errout;
 
 
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return skb->len;
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                return skb->len;
 
        if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
        if (!q)
                goto out;
-       if ((cops = q->ops->cl_ops) == NULL)
+       cops = q->ops->cl_ops;
+       if (!cops)
                goto errout;
        if (cops->tcf_chain == NULL)
                goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 
        s_t = cb->args[0];
 
-       for (tp=*chain, t=0; tp; tp = tp->next, t++) {
-               if (t < s_t) continue;
+       for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
+               if (t < s_t)
+                       continue;
                if (TC_H_MAJ(tcm->tcm_info) &&
                    TC_H_MAJ(tcm->tcm_info) != tp->prio)
                        continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                arg.skb = skb;
                arg.cb = cb;
                arg.w.stop = 0;
-               arg.w.skip = cb->args[1]-1;
+               arg.w.skip = cb->args[1] - 1;
                arg.w.count = 0;
                tp->ops->walk(tp, &arg.w);
-               cb->args[1] = arg.w.count+1;
+               cb->args[1] = arg.w.count + 1;
                if (arg.w.stop)
                        break;
        }
index f23d915..8be8872 100644 (file)
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
-struct basic_head
-{
+struct basic_head {
        u32                     hgenerator;
        struct list_head        flist;
 };
 
-struct basic_filter
-{
+struct basic_filter {
        u32                     handle;
        struct tcf_exts         exts;
        struct tcf_ematch_tree  ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
        return 0;
 }
 
-static inline void basic_delete_filter(struct tcf_proto *tp,
-                                      struct basic_filter *f)
+static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
        tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
        [TCA_BASIC_EMATCHES]    = { .type = NLA_NESTED },
 };
 
-static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-                                 unsigned long base, struct nlattr **tb,
-                                 struct nlattr *est)
+static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+                          unsigned long base, struct nlattr **tb,
+                          struct nlattr *est)
 {
        int err = -EINVAL;
        struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
                } while (--i > 0 && basic_get(tp, head->hgenerator));
 
                if (i <= 0) {
-                       printk(KERN_ERR "Insufficient number of handles\n");
+                       pr_err("Insufficient number of handles\n");
                        goto errout;
                }
 
index d49c40f..32a3351 100644 (file)
@@ -56,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 {
        struct cgroup_cls_state *cs;
 
-       if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+       if (!cs)
                return ERR_PTR(-ENOMEM);
 
        if (cgrp->parent)
@@ -94,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
        return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
 }
 
-struct cls_cgroup_head
-{
+struct cls_cgroup_head {
        u32                     handle;
        struct tcf_exts         exts;
        struct tcf_ematch_tree  ematches;
@@ -166,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
                             u32 handle, struct nlattr **tca,
                             unsigned long *arg)
 {
-       struct nlattr *tb[TCA_CGROUP_MAX+1];
+       struct nlattr *tb[TCA_CGROUP_MAX + 1];
        struct cls_cgroup_head *head = tp->root;
        struct tcf_ematch_tree t;
        struct tcf_exts e;
index 5b271a1..8ec0139 100644 (file)
@@ -121,7 +121,7 @@ static u32 flow_get_proto_src(struct sk_buff *skb)
                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        break;
                iph = ip_hdr(skb);
-               if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+               if (iph->frag_off & htons(IP_MF | IP_OFFSET))
                        break;
                poff = proto_ports_offset(iph->protocol);
                if (poff >= 0 &&
@@ -163,7 +163,7 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        break;
                iph = ip_hdr(skb);
-               if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+               if (iph->frag_off & htons(IP_MF | IP_OFFSET))
                        break;
                poff = proto_ports_offset(iph->protocol);
                if (poff >= 0 &&
@@ -276,7 +276,7 @@ fallback:
 
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (skb_dst(skb))
                return skb_dst(skb)->tclassid;
 #endif
index 93b0a7b..26e7bc4 100644 (file)
 
 #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
 
-struct fw_head
-{
+struct fw_head {
        struct fw_filter *ht[HTSIZE];
        u32 mask;
 };
 
-struct fw_filter
-{
+struct fw_filter {
        struct fw_filter        *next;
        u32                     id;
        struct tcf_result       res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
        .police = TCA_FW_POLICE
 };
 
-static __inline__ int fw_hash(u32 handle)
+static inline int fw_hash(u32 handle)
 {
        if (HTSIZE == 4096)
                return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
 static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
                          struct tcf_result *res)
 {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
        struct fw_filter *f;
        int r;
        u32 id = skb->mark;
 
        if (head != NULL) {
                id &= head->mask;
-               for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+               for (f = head->ht[fw_hash(id)]; f; f = f->next) {
                        if (f->id == id) {
                                *res = f->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
                }
        } else {
                /* old method */
-               if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+               if (id && (TC_H_MAJ(id) == 0 ||
+                          !(TC_H_MAJ(id ^ tp->q->handle)))) {
                        res->classid = id;
                        res->class = 0;
                        return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 
 static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
 {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
        struct fw_filter *f;
 
        if (head == NULL)
                return 0;
 
-       for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+       for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
                if (f->id == handle)
                        return (unsigned long)f;
        }
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
        return 0;
 }
 
-static inline void
-fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
+static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
        tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
        if (head == NULL)
                return;
 
-       for (h=0; h<HTSIZE; h++) {
-               while ((f=head->ht[h]) != NULL) {
+       for (h = 0; h < HTSIZE; h++) {
+               while ((f = head->ht[h]) != NULL) {
                        head->ht[h] = f->next;
                        fw_delete_filter(tp, f);
                }
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
 
 static int fw_delete(struct tcf_proto *tp, unsigned long arg)
 {
-       struct fw_head *head = (struct fw_head*)tp->root;
-       struct fw_filter *f = (struct fw_filter*)arg;
+       struct fw_head *head = (struct fw_head *)tp->root;
+       struct fw_filter *f = (struct fw_filter *)arg;
        struct fw_filter **fp;
 
        if (head == NULL || f == NULL)
                goto out;
 
-       for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+       for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
                if (*fp == f) {
                        tcf_tree_lock(tp);
                        *fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
                     struct nlattr **tca,
                     unsigned long *arg)
 {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
        struct fw_filter *f = (struct fw_filter *) *arg;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
 
 static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
-       struct fw_head *head = (struct fw_head*)tp->root;
+       struct fw_head *head = (struct fw_head *)tp->root;
        int h;
 
        if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
                   struct sk_buff *skb, struct tcmsg *t)
 {
        struct fw_head *head = (struct fw_head *)tp->root;
-       struct fw_filter *f = (struct fw_filter*)fh;
+       struct fw_filter *f = (struct fw_filter *)fh;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
 
index 694dcd8..d580cdf 100644 (file)
 #include <net/pkt_cls.h>
 
 /*
  1. For now we assume that route tags < 256.
     It allows to use direct table lookups, instead of hash tables.
  2. For now we assume that "from TAG" and "fromdev DEV" statements
     are mutually  exclusive.
  3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
* 1. For now we assume that route tags < 256.
*    It allows to use direct table lookups, instead of hash tables.
* 2. For now we assume that "from TAG" and "fromdev DEV" statements
*    are mutually  exclusive.
* 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
  */
 
-struct route4_fastmap
-{
+struct route4_fastmap {
        struct route4_filter    *filter;
        u32                     id;
        int                     iif;
 };
 
-struct route4_head
-{
+struct route4_head {
        struct route4_fastmap   fastmap[16];
-       struct route4_bucket    *table[256+1];
+       struct route4_bucket    *table[256 + 1];
 };
 
-struct route4_bucket
-{
+struct route4_bucket {
        /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
-       struct route4_filter    *ht[16+16+1];
+       struct route4_filter    *ht[16 + 16 + 1];
 };
 
-struct route4_filter
-{
+struct route4_filter {
        struct route4_filter    *next;
        u32                     id;
        int                     iif;
@@ -61,20 +57,20 @@ struct route4_filter
        struct route4_bucket    *bkt;
 };
 
-#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
 
 static const struct tcf_ext_map route_ext_map = {
        .police = TCA_ROUTE4_POLICE,
        .action = TCA_ROUTE4_ACT
 };
 
-static __inline__ int route4_fastmap_hash(u32 id, int iif)
+static inline int route4_fastmap_hash(u32 id, int iif)
 {
-       return id&0xF;
+       return id & 0xF;
 }
 
-static inline
-void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+static void
+route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 {
        spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
 
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
        spin_unlock_bh(root_lock);
 }
 
-static inline void
+static void
 route4_set_fastmap(struct route4_head *head, u32 id, int iif,
                   struct route4_filter *f)
 {
        int h = route4_fastmap_hash(id, iif);
+
        head->fastmap[h].id = id;
        head->fastmap[h].iif = iif;
        head->fastmap[h].filter = f;
 }
 
-static __inline__ int route4_hash_to(u32 id)
+static inline int route4_hash_to(u32 id)
 {
-       return id&0xFF;
+       return id & 0xFF;
 }
 
-static __inline__ int route4_hash_from(u32 id)
+static inline int route4_hash_from(u32 id)
 {
-       return (id>>16)&0xF;
+       return (id >> 16) & 0xF;
 }
 
-static __inline__ int route4_hash_iif(int iif)
+static inline int route4_hash_iif(int iif)
 {
-       return 16 + ((iif>>16)&0xF);
+       return 16 + ((iif >> 16) & 0xF);
 }
 
-static __inline__ int route4_hash_wild(void)
+static inline int route4_hash_wild(void)
 {
        return 32;
 }
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
 static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
                           struct tcf_result *res)
 {
-       struct route4_head *head = (struct route4_head*)tp->root;
+       struct route4_head *head = (struct route4_head *)tp->root;
        struct dst_entry *dst;
        struct route4_bucket *b;
        struct route4_filter *f;
        u32 id, h;
        int iif, dont_cache = 0;
 
-       if ((dst = skb_dst(skb)) == NULL)
+       dst = skb_dst(skb);
+       if (!dst)
                goto failure;
 
        id = dst->tclassid;
        if (head == NULL)
                goto old_method;
 
-       iif = ((struct rtable*)dst)->fl.iif;
+       iif = ((struct rtable *)dst)->fl.iif;
 
        h = route4_fastmap_hash(id, iif);
        if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
        h = route4_hash_to(id);
 
 restart:
-       if ((b = head->table[h]) != NULL) {
+       b = head->table[h];
+       if (b) {
                for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
                        if (f->id == id)
                                ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
 
 static inline u32 to_hash(u32 id)
 {
-       u32 h = id&0xFF;
-       if (id&0x8000)
+       u32 h = id & 0xFF;
+
+       if (id & 0x8000)
                h += 256;
        return h;
 }
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
        if (!(id & 0x8000)) {
                if (id > 255)
                        return 256;
-               return id&0xF;
+               return id & 0xF;
        }
-       return 16 + (id&0xF);
+       return 16 + (id & 0xF);
 }
 
 static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 {
-       struct route4_head *head = (struct route4_head*)tp->root;
+       struct route4_head *head = (struct route4_head *)tp->root;
        struct route4_bucket *b;
        struct route4_filter *f;
-       unsigned h1, h2;
+       unsigned int h1, h2;
 
        if (!head)
                return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
        if (h1 > 256)
                return 0;
 
-       h2 = from_hash(handle>>16);
+       h2 = from_hash(handle >> 16);
        if (h2 > 32)
                return 0;
 
-       if ((b = head->table[h1]) != NULL) {
+       b = head->table[h1];
+       if (b) {
                for (f = b->ht[h2]; f; f = f->next)
                        if (f->handle == handle)
                                return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
        return 0;
 }
 
-static inline void
+static void
 route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
        if (head == NULL)
                return;
 
-       for (h1=0; h1<=256; h1++) {
+       for (h1 = 0; h1 <= 256; h1++) {
                struct route4_bucket *b;
 
-               if ((b = head->table[h1]) != NULL) {
-                       for (h2=0; h2<=32; h2++) {
+               b = head->table[h1];
+               if (b) {
+                       for (h2 = 0; h2 <= 32; h2++) {
                                struct route4_filter *f;
 
                                while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
 
 static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 {
-       struct route4_head *head = (struct route4_head*)tp->root;
-       struct route4_filter **fp, *f = (struct route4_filter*)arg;
-       unsigned h = 0;
+       struct route4_head *head = (struct route4_head *)tp->root;
+       struct route4_filter **fp, *f = (struct route4_filter *)arg;
+       unsigned int h = 0;
        struct route4_bucket *b;
        int i;
 
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
        h = f->handle;
        b = f->bkt;
 
-       for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+       for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
                if (*fp == f) {
                        tcf_tree_lock(tp);
                        *fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 
                        /* Strip tree */
 
-                       for (i=0; i<=32; i++)
+                       for (i = 0; i <= 32; i++)
                                if (b->ht[i])
                                        return 0;
 
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
        }
 
        h1 = to_hash(nhandle);
-       if ((b = head->table[h1]) == NULL) {
+       b = head->table[h1];
+       if (!b) {
                err = -ENOBUFS;
                b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
                if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
                tcf_tree_unlock(tp);
        } else {
                unsigned int h2 = from_hash(nhandle >> 16);
+
                err = -EEXIST;
                for (fp = b->ht[h2]; fp; fp = fp->next)
                        if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
        if (err < 0)
                return err;
 
-       if ((f = (struct route4_filter*)*arg) != NULL) {
+       f = (struct route4_filter *)*arg;
+       if (f) {
                if (f->handle != handle && handle)
                        return -EINVAL;
 
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 
 reinsert:
        h = from_hash(f->handle >> 16);
-       for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+       for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
                if (f->handle < f1->handle)
                        break;
 
@@ -492,7 +497,8 @@ reinsert:
        if (old_handle && f->handle != old_handle) {
                th = to_hash(old_handle);
                h = from_hash(old_handle >> 16);
-               if ((b = head->table[th]) != NULL) {
+               b = head->table[th];
+               if (b) {
                        for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
                                if (*fp == f) {
                                        *fp = f->next;
@@ -515,7 +521,7 @@ errout:
 static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
        struct route4_head *head = tp->root;
-       unsigned h, h1;
+       unsigned int h, h1;
 
        if (head == NULL)
                arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int route4_dump(struct tcf_proto *tp, unsigned long fh,
                       struct sk_buff *skb, struct tcmsg *t)
 {
-       struct route4_filter *f = (struct route4_filter*)fh;
+       struct route4_filter *f = (struct route4_filter *)fh;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
        u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
        if (nest == NULL)
                goto nla_put_failure;
 
-       if (!(f->handle&0x8000)) {
-               id = f->id&0xFF;
+       if (!(f->handle & 0x8000)) {
+               id = f->id & 0xFF;
                NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
        }
-       if (f->handle&0x80000000) {
-               if ((f->handle>>16) != 0xFFFF)
+       if (f->handle & 0x80000000) {
+               if ((f->handle >> 16) != 0xFFFF)
                        NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
        } else {
-               id = f->id>>16;
+               id = f->id >> 16;
                NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
        }
        if (f->res.classid)
index 425a179..402c44b 100644 (file)
    powerful classification engine.  */
 
 
-struct rsvp_head
-{
+struct rsvp_head {
        u32                     tmap[256/32];
        u32                     hgenerator;
        u8                      tgenerator;
        struct rsvp_session     *ht[256];
 };
 
-struct rsvp_session
-{
+struct rsvp_session {
        struct rsvp_session     *next;
        __be32                  dst[RSVP_DST_LEN];
        struct tc_rsvp_gpi      dpi;
        u8                      protocol;
        u8                      tunnelid;
        /* 16 (src,sport) hash slots, and one wildcard source slot */
-       struct rsvp_filter      *ht[16+1];
+       struct rsvp_filter      *ht[16 + 1];
 };
 
 
-struct rsvp_filter
-{
+struct rsvp_filter {
        struct rsvp_filter      *next;
        __be32                  src[RSVP_DST_LEN];
        struct tc_rsvp_gpi      spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
        struct rsvp_session     *sess;
 };
 
-static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 {
-       unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+       unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
+
        h ^= h>>16;
        h ^= h>>8;
        return (h ^ protocol ^ tunnelid) & 0xFF;
 }
 
-static __inline__ unsigned hash_src(__be32 *src)
+static inline unsigned int hash_src(__be32 *src)
 {
-       unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+       unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
+
        h ^= h>>16;
        h ^= h>>8;
        h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
                         struct tcf_result *res)
 {
-       struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+       struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
        struct rsvp_session *s;
        struct rsvp_filter *f;
-       unsigned h1, h2;
+       unsigned int h1, h2;
        __be32 *dst, *src;
        u8 protocol;
        u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
        src = &nhptr->saddr.s6_addr32[0];
        dst = &nhptr->daddr.s6_addr32[0];
        protocol = nhptr->nexthdr;
-       xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+       xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
 #else
        src = &nhptr->saddr;
        dst = &nhptr->daddr;
        protocol = nhptr->protocol;
-       xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
-       if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+       xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
+       if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
                return -1;
 #endif
 
@@ -176,10 +175,10 @@ restart:
        h2 = hash_src(src);
 
        for (s = sht[h1]; s; s = s->next) {
-               if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+               if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
                    protocol == s->protocol &&
                    !(s->dpi.mask &
-                     (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
+                     (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
 #if RSVP_DST_LEN == 4
                    dst[0] == s->dst[0] &&
                    dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
                    tunnelid == s->tunnelid) {
 
                        for (f = s->ht[h2]; f; f = f->next) {
-                               if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
-                                   !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+                               if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
+                                   !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
 #if RSVP_DST_LEN == 4
                                    &&
                                    src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
                                                return 0;
 
                                        tunnelid = f->res.classid;
-                                       nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+                                       nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
                                        goto restart;
                                }
                        }
@@ -224,11 +223,11 @@ matched:
 
 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 {
-       struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+       struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
        struct rsvp_session *s;
        struct rsvp_filter *f;
-       unsigned h1 = handle&0xFF;
-       unsigned h2 = (handle>>8)&0xFF;
+       unsigned int h1 = handle & 0xFF;
+       unsigned int h2 = (handle >> 8) & 0xFF;
 
        if (h2 > 16)
                return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
        return -ENOBUFS;
 }
 
-static inline void
+static void
 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 
        sht = data->ht;
 
-       for (h1=0; h1<256; h1++) {
+       for (h1 = 0; h1 < 256; h1++) {
                struct rsvp_session *s;
 
                while ((s = sht[h1]) != NULL) {
                        sht[h1] = s->next;
 
-                       for (h2=0; h2<=16; h2++) {
+                       for (h2 = 0; h2 <= 16; h2++) {
                                struct rsvp_filter *f;
 
                                while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 
 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 {
-       struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
-       unsigned h = f->handle;
+       struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
+       unsigned int h = f->handle;
        struct rsvp_session **sp;
        struct rsvp_session *s = f->sess;
        int i;
 
-       for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+       for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
                if (*fp == f) {
                        tcf_tree_lock(tp);
                        *fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 
                        /* Strip tree */
 
-                       for (i=0; i<=16; i++)
+                       for (i = 0; i <= 16; i++)
                                if (s->ht[i])
                                        return 0;
 
                        /* OK, session has no flows */
-                       for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+                       for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
                             *sp; sp = &(*sp)->next) {
                                if (*sp == s) {
                                        tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
        return 0;
 }
 
-static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 {
        struct rsvp_head *data = tp->root;
        int i = 0xFFFF;
 
        while (i-- > 0) {
                u32 h;
+
                if ((data->hgenerator += 0x10000) == 0)
                        data->hgenerator = 0x10000;
                h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 
 static int tunnel_bts(struct rsvp_head *data)
 {
-       int n = data->tgenerator>>5;
-       u32 b = 1<<(data->tgenerator&0x1F);
+       int n = data->tgenerator >> 5;
+       u32 b = 1 << (data->tgenerator & 0x1F);
 
-       if (data->tmap[n]&b)
+       if (data->tmap[n] & b)
                return 0;
        data->tmap[n] |= b;
        return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
 
        memset(tmap, 0, sizeof(tmap));
 
-       for (h1=0; h1<256; h1++) {
+       for (h1 = 0; h1 < 256; h1++) {
                struct rsvp_session *s;
                for (s = sht[h1]; s; s = s->next) {
-                       for (h2=0; h2<=16; h2++) {
+                       for (h2 = 0; h2 <= 16; h2++) {
                                struct rsvp_filter *f;
 
                                for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
 {
        int i, k;
 
-       for (k=0; k<2; k++) {
-               for (i=255; i>0; i--) {
+       for (k = 0; k < 2; k++) {
+               for (i = 255; i > 0; i--) {
                        if (++data->tgenerator == 0)
                                data->tgenerator = 1;
                        if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
        struct nlattr *opt = tca[TCA_OPTIONS-1];
        struct nlattr *tb[TCA_RSVP_MAX + 1];
        struct tcf_exts e;
-       unsigned h1, h2;
+       unsigned int h1, h2;
        __be32 *dst;
        int err;
 
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
        if (err < 0)
                return err;
 
-       if ((f = (struct rsvp_filter*)*arg) != NULL) {
+       f = (struct rsvp_filter *)*arg;
+       if (f) {
                /* Node exists: adjust only classid */
 
                if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
                        goto errout;
        }
 
-       for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+       for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
                    pinfo && pinfo->protocol == s->protocol &&
                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
                        tcf_exts_change(tp, &f->exts, &e);
 
                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
-                               if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+                               if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
                                        break;
                        f->next = *fp;
                        wmb();
@@ -567,7 +568,7 @@ errout2:
 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
        struct rsvp_head *head = tp->root;
-       unsigned h, h1;
+       unsigned int h, h1;
 
        if (arg->stop)
                return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
                     struct sk_buff *skb, struct tcmsg *t)
 {
-       struct rsvp_filter *f = (struct rsvp_filter*)fh;
+       struct rsvp_filter *f = (struct rsvp_filter *)fh;
        struct rsvp_session *s;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
        NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
        if (f->res.classid)
                NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
-       if (((f->handle>>8)&0xFF) != 16)
+       if (((f->handle >> 8) & 0xFF) != 16)
                NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 
        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
index 20ef330..36667fa 100644 (file)
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
                 * of the hashing index is below the threshold.
                 */
                if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-                       cp.hash = (cp.mask >> cp.shift)+1;
+                       cp.hash = (cp.mask >> cp.shift) + 1;
                else
                        cp.hash = DEFAULT_HASH_SIZE;
        }
index b0c2a82..966920c 100644 (file)
@@ -42,8 +42,7 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
 
-struct tc_u_knode
-{
+struct tc_u_knode {
        struct tc_u_knode       *next;
        u32                     handle;
        struct tc_u_hnode       *ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
        struct tc_u32_sel       sel;
 };
 
-struct tc_u_hnode
-{
+struct tc_u_hnode {
        struct tc_u_hnode       *next;
        u32                     handle;
        u32                     prio;
        struct tc_u_common      *tp_c;
        int                     refcnt;
-       unsigned                divisor;
+       unsigned int            divisor;
        struct tc_u_knode       *ht[1];
 };
 
-struct tc_u_common
-{
+struct tc_u_common {
        struct tc_u_hnode       *hlist;
        struct Qdisc            *q;
        int                     refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
        .police = TCA_U32_POLICE
 };
 
-static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+static inline unsigned int u32_hash_fold(__be32 key,
+                                        const struct tc_u32_sel *sel,
+                                        u8 fshift)
 {
-       unsigned h = ntohl(key & sel->hmask)>>fshift;
+       unsigned int h = ntohl(key & sel->hmask) >> fshift;
 
        return h;
 }
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
                unsigned int      off;
        } stack[TC_U32_MAXDEPTH];
 
-       struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+       struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
        unsigned int off = skb_network_offset(skb);
        struct tc_u_knode *n;
        int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
                struct tc_u32_key *key = n->sel.keys;
 
 #ifdef CONFIG_CLS_U32_PERF
-               n->pf->rcnt +=1;
+               n->pf->rcnt += 1;
                j = 0;
 #endif
 
@@ -133,7 +132,7 @@ next_knode:
                }
 #endif
 
-               for (i = n->sel.nkeys; i>0; i--, key++) {
+               for (i = n->sel.nkeys; i > 0; i--, key++) {
                        int toff = off + key->off + (off2 & key->offmask);
                        __be32 *data, _data;
 
@@ -148,13 +147,13 @@ next_knode:
                                goto next_knode;
                        }
 #ifdef CONFIG_CLS_U32_PERF
-                       n->pf->kcnts[j] +=1;
+                       n->pf->kcnts[j] += 1;
                        j++;
 #endif
                }
                if (n->ht_down == NULL) {
 check_terminal:
-                       if (n->sel.flags&TC_U32_TERMINAL) {
+                       if (n->sel.flags & TC_U32_TERMINAL) {
 
                                *res = n->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
                                }
 #endif
 #ifdef CONFIG_CLS_U32_PERF
-                               n->pf->rhit +=1;
+                               n->pf->rhit += 1;
 #endif
                                r = tcf_exts_exec(skb, &n->exts, res);
                                if (r < 0) {
@@ -197,10 +196,10 @@ check_terminal:
                        sel = ht->divisor & u32_hash_fold(*data, &n->sel,
                                                          n->fshift);
                }
-               if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+               if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
                        goto next_ht;
 
-               if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+               if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
                        off2 = n->sel.off + 3;
                        if (n->sel.flags & TC_U32_VAROFFSET) {
                                __be16 *data, _data;
@@ -215,7 +214,7 @@ check_terminal:
                        }
                        off2 &= ~3;
                }
-               if (n->sel.flags&TC_U32_EAT) {
+               if (n->sel.flags & TC_U32_EAT) {
                        off += off2;
                        off2 = 0;
                }
@@ -236,11 +235,11 @@ out:
 
 deadloop:
        if (net_ratelimit())
-               printk(KERN_WARNING "cls_u32: dead loop\n");
+               pr_warning("cls_u32: dead loop\n");
        return -1;
 }
 
-static __inline__ struct tc_u_hnode *
+static struct tc_u_hnode *
 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 {
        struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
        return ht;
 }
 
-static __inline__ struct tc_u_knode *
+static struct tc_u_knode *
 u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 {
-       unsigned sel;
+       unsigned int sel;
        struct tc_u_knode *n = NULL;
 
        sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
        do {
                if (++tp_c->hgenerator == 0x7FF)
                        tp_c->hgenerator = 1;
-       } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+       } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
 
        return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 }
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 {
        struct tc_u_knode *n;
-       unsigned h;
+       unsigned int h;
 
-       for (h=0; h<=ht->divisor; h++) {
+       for (h = 0; h <= ht->divisor; h++) {
                while ((n = ht->ht[h]) != NULL) {
                        ht->ht[h] = n->next;
 
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
 
 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 {
-       struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+       struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
 
        if (ht == NULL)
                return 0;
 
        if (TC_U32_KEY(ht->handle))
-               return u32_delete_key(tp, (struct tc_u_knode*)ht);
+               return u32_delete_key(tp, (struct tc_u_knode *)ht);
 
        if (tp->root == ht)
                return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 {
        struct tc_u_knode *n;
-       unsigned i = 0x7FF;
+       unsigned int i = 0x7FF;
 
-       for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+       for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
                if (i < TC_U32_NODE(n->handle))
                        i = TC_U32_NODE(n->handle);
        i++;
 
-       return handle|(i>0xFFF ? 0xFFF : i);
+       return handle | (i > 0xFFF ? 0xFFF : i);
 }
 
 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
        if (err < 0)
                return err;
 
-       if ((n = (struct tc_u_knode*)*arg) != NULL) {
+       n = (struct tc_u_knode *)*arg;
+       if (n) {
                if (TC_U32_KEY(n->handle) == 0)
                        return -EINVAL;
 
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
        }
 
        if (tb[TCA_U32_DIVISOR]) {
-               unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+               unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
 
                if (--divisor > 0x100)
                        return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
                        if (handle == 0)
                                return -ENOMEM;
                }
-               ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+               ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
                if (ht == NULL)
                        return -ENOBUFS;
                ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
        struct tc_u_common *tp_c = tp->data;
        struct tc_u_hnode *ht;
        struct tc_u_knode *n;
-       unsigned h;
+       unsigned int h;
 
        if (arg->stop)
                return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                     struct sk_buff *skb, struct tcmsg *t)
 {
-       struct tc_u_knode *n = (struct tc_u_knode*)fh;
+       struct tc_u_knode *n = (struct tc_u_knode *)fh;
        struct nlattr *nest;
 
        if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                goto nla_put_failure;
 
        if (TC_U32_KEY(n->handle) == 0) {
-               struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
-               u32 divisor = ht->divisor+1;
+               struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
+               u32 divisor = ht->divisor + 1;
+
                NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
        } else {
                NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                        goto nla_put_failure;
 
 #ifdef CONFIG_NET_CLS_IND
-               if(strlen(n->indev))
+               if (strlen(n->indev))
                        NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
 #endif
 #ifdef CONFIG_CLS_U32_PERF
index bc45039..1c8360a 100644 (file)
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
                return 0;
 
        switch (cmp->align) {
-               case TCF_EM_ALIGN_U8:
-                       val = *ptr;
-                       break;
+       case TCF_EM_ALIGN_U8:
+               val = *ptr;
+               break;
 
-               case TCF_EM_ALIGN_U16:
-                       val = get_unaligned_be16(ptr);
+       case TCF_EM_ALIGN_U16:
+               val = get_unaligned_be16(ptr);
 
-                       if (cmp_needs_transformation(cmp))
-                               val = be16_to_cpu(val);
-                       break;
+               if (cmp_needs_transformation(cmp))
+                       val = be16_to_cpu(val);
+               break;
 
-               case TCF_EM_ALIGN_U32:
-                       /* Worth checking boundries? The branching seems
-                        * to get worse. Visit again. */
-                       val = get_unaligned_be32(ptr);
+       case TCF_EM_ALIGN_U32:
+               /* Worth checking boundries? The branching seems
+                * to get worse. Visit again.
+                */
+               val = get_unaligned_be32(ptr);
 
-                       if (cmp_needs_transformation(cmp))
-                               val = be32_to_cpu(val);
-                       break;
+               if (cmp_needs_transformation(cmp))
+                       val = be32_to_cpu(val);
+               break;
 
-               default:
-                       return 0;
+       default:
+               return 0;
        }
 
        if (cmp->mask)
                val &= cmp->mask;
 
        switch (cmp->opnd) {
-               case TCF_EM_OPND_EQ:
-                       return val == cmp->val;
-               case TCF_EM_OPND_LT:
-                       return val < cmp->val;
-               case TCF_EM_OPND_GT:
-                       return val > cmp->val;
+       case TCF_EM_OPND_EQ:
+               return val == cmp->val;
+       case TCF_EM_OPND_LT:
+               return val < cmp->val;
+       case TCF_EM_OPND_GT:
+               return val > cmp->val;
        }
 
        return 0;
index 34da5e2..a889d09 100644 (file)
 #include <net/pkt_cls.h>
 #include <net/sock.h>
 
-struct meta_obj
-{
+struct meta_obj {
        unsigned long           value;
        unsigned int            len;
 };
 
-struct meta_value
-{
+struct meta_value {
        struct tcf_meta_val     hdr;
        unsigned long           val;
        unsigned int            len;
 };
 
-struct meta_match
-{
+struct meta_match {
        struct meta_value       lvalue;
        struct meta_value       rvalue;
 };
@@ -255,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
        if (unlikely(skb_dst(skb) == NULL))
                *err = -1;
        else
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                dst->value = skb_dst(skb)->tclassid;
 #else
                dst->value = 0;
@@ -483,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
  * Meta value collectors assignment table
  **************************************************************************/
 
-struct meta_ops
-{
+struct meta_ops {
        void            (*get)(struct sk_buff *, struct tcf_pkt_info *,
                               struct meta_value *, struct meta_obj *, int *);
 };
@@ -494,7 +490,7 @@ struct meta_ops
 
 /* Meta value operations table listing all meta value collectors and
  * assigns them to a type and meta id. */
-static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
        [TCF_META_TYPE_VAR] = {
                [META_ID(DEV)]                  = META_FUNC(var_dev),
                [META_ID(SK_BOUND_IF)]          = META_FUNC(var_sk_bound_if),
@@ -550,7 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
        }
 };
 
-static inline struct meta_ops * meta_ops(struct meta_value *val)
+static inline struct meta_ops *meta_ops(struct meta_value *val)
 {
        return &__meta_ops[meta_type(val)][meta_id(val)];
 }
@@ -649,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 {
        if (v->len == sizeof(unsigned long))
                NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
-       else if (v->len == sizeof(u32)) {
+       else if (v->len == sizeof(u32))
                NLA_PUT_U32(skb, tlv, v->val);
-       }
 
        return 0;
 
@@ -663,8 +658,7 @@ nla_put_failure:
  * Type specific operations table
  **************************************************************************/
 
-struct meta_type_ops
-{
+struct meta_type_ops {
        void    (*destroy)(struct meta_value *);
        int     (*compare)(struct meta_obj *, struct meta_obj *);
        int     (*change)(struct meta_value *, struct nlattr *);
@@ -672,7 +666,7 @@ struct meta_type_ops
        int     (*dump)(struct sk_buff *, struct meta_value *, int);
 };
 
-static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
        [TCF_META_TYPE_VAR] = {
                .destroy = meta_var_destroy,
                .compare = meta_var_compare,
@@ -688,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
        }
 };
 
-static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
 {
        return &__meta_type_ops[meta_type(v)];
 }
@@ -713,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
                return err;
 
        if (meta_type_ops(v)->apply_extras)
-           meta_type_ops(v)->apply_extras(v, dst);
+               meta_type_ops(v)->apply_extras(v, dst);
 
        return 0;
 }
@@ -732,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
        r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
 
        switch (meta->lvalue.hdr.op) {
-               case TCF_EM_OPND_EQ:
-                       return !r;
-               case TCF_EM_OPND_LT:
-                       return r < 0;
-               case TCF_EM_OPND_GT:
-                       return r > 0;
+       case TCF_EM_OPND_EQ:
+               return !r;
+       case TCF_EM_OPND_LT:
+               return r < 0;
+       case TCF_EM_OPND_GT:
+               return r > 0;
        }
 
        return 0;
@@ -771,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
 
 static inline int meta_is_supported(struct meta_value *val)
 {
-       return (!meta_id(val) || meta_ops(val)->get);
+       return !meta_id(val) || meta_ops(val)->get;
 }
 
 static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
index 1a4176a..a3bed07 100644 (file)
@@ -18,8 +18,7 @@
 #include <linux/tc_ematch/tc_em_nbyte.h>
 #include <net/pkt_cls.h>
 
-struct nbyte_data
-{
+struct nbyte_data {
        struct tcf_em_nbyte     hdr;
        char                    pattern[0];
 };
index ea8f566..15d353d 100644 (file)
@@ -19,8 +19,7 @@
 #include <linux/tc_ematch/tc_em_text.h>
 #include <net/pkt_cls.h>
 
-struct text_match
-{
+struct text_match {
        u16                     from_offset;
        u16                     to_offset;
        u8                      from_layer;
index 953f147..797bdb8 100644 (file)
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
        if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
                return 0;
 
-       return !(((*(__be32*) ptr)  ^ key->val) & key->mask);
+       return !(((*(__be32 *) ptr)  ^ key->val) & key->mask);
 }
 
 static struct tcf_ematch_ops em_u32_ops = {
index 5e37da9..88d93eb 100644 (file)
@@ -93,7 +93,7 @@
 static LIST_HEAD(ematch_ops);
 static DEFINE_RWLOCK(ematch_mod_lock);
 
-static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
 {
        struct tcf_ematch_ops *e = NULL;
 
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
 }
 EXPORT_SYMBOL(tcf_em_unregister);
 
-static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
-                                                  int index)
+static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
+                                                 int index)
 {
        return &tree->matches[index];
 }
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 
        if (em_hdr->kind == TCF_EM_CONTAINER) {
                /* Special ematch called "container", carries an index
-                * referencing an external ematch sequence. */
+                * referencing an external ematch sequence.
+                */
                u32 ref;
 
                if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                        goto errout;
 
                /* We do not allow backward jumps to avoid loops and jumps
-                * to our own position are of course illegal. */
+                * to our own position are of course illegal.
+                */
                if (ref <= idx)
                        goto errout;
 
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                 * which automatically releases the reference again, therefore
                 * the module MUST not be given back under any circumstances
                 * here. Be aware, the destroy function assumes that the
-                * module is held if the ops field is non zero. */
+                * module is held if the ops field is non zero.
+                */
                em->ops = tcf_em_lookup(em_hdr->kind);
 
                if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                        if (em->ops) {
                                /* We dropped the RTNL mutex in order to
                                 * perform the module load. Tell the caller
-                                * to replay the request. */
+                                * to replay the request.
+                                */
                                module_put(em->ops->owner);
                                err = -EAGAIN;
                        }
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                }
 
                /* ematch module provides expected length of data, so we
-                * can do a basic sanity check. */
+                * can do a basic sanity check.
+                */
                if (em->ops->datalen && data_len < em->ops->datalen)
                        goto errout;
 
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                         * TCF_EM_SIMPLE may be specified stating that the
                         * data only consists of a u32 integer and the module
                         * does not expected a memory reference but rather
-                        * the value carried. */
+                        * the value carried.
+                        */
                        if (em_hdr->flags & TCF_EM_SIMPLE) {
                                if (data_len < sizeof(u32))
                                        goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
         * The array of rt attributes is parsed in the order as they are
         * provided, their type must be incremental from 1 to n. Even
         * if it does not serve any real purpose, a failure of sticking
-        * to this policy will result in parsing failure. */
+        * to this policy will result in parsing failure.
+        */
        for (idx = 0; nla_ok(rt_match, list_len); idx++) {
                err = -EINVAL;
 
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
        /* Check if the number of matches provided by userspace actually
         * complies with the array of matches. The number was used for
         * the validation of references and a mismatch could lead to
-        * undefined references during the matching process. */
+        * undefined references during the matching process.
+        */
        if (idx != tree_hdr->nmatches) {
                err = -EINVAL;
                goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
                        .flags = em->flags
                };
 
-               NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+               NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
 
                if (em->ops && em->ops->dump) {
                        if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
                               struct tcf_pkt_info *info)
 {
        int r = em->ops->match(skb, em, info);
+
        return tcf_em_is_inverted(em) ? !r : r;
 }
 
@@ -527,8 +536,8 @@ pop_stack:
 
 stack_overflow:
        if (net_ratelimit())
-               printk(KERN_WARNING "tc ematch: local stack overflow,"
-                       " increase NET_EMATCH_STACK\n");
+               pr_warning("tc ematch: local stack overflow,"
+                          " increase NET_EMATCH_STACK\n");
        return -1;
 }
 EXPORT_SYMBOL(__tcf_em_tree_match);
index b22ca2d..1507415 100644 (file)
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
        int err = -ENOENT;
 
        write_lock(&qdisc_mod_lock);
-       for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+       for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
                if (q == qops)
                        break;
        if (q) {
@@ -321,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
        if (!tab || --tab->refcnt)
                return;
 
-       for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+       for (rtabp = &qdisc_rtab_list;
+            (rtab = *rtabp) != NULL;
+            rtabp = &rtab->next) {
                if (rtab == tab) {
                        *rtabp = rtab->next;
                        kfree(rtab);
@@ -396,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
        return stab;
 }
 
+static void stab_kfree_rcu(struct rcu_head *head)
+{
+       kfree(container_of(head, struct qdisc_size_table, rcu));
+}
+
 void qdisc_put_stab(struct qdisc_size_table *tab)
 {
        if (!tab)
@@ -405,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
 
        if (--tab->refcnt == 0) {
                list_del(&tab->list);
-               kfree(tab);
+               call_rcu_bh(&tab->rcu, stab_kfree_rcu);
        }
 
        spin_unlock(&qdisc_stab_lock);
@@ -428,7 +435,7 @@ nla_put_failure:
        return -1;
 }
 
-void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
 {
        int pkt_len, slot;
 
@@ -454,14 +461,13 @@ out:
                pkt_len = 1;
        qdisc_skb_cb(skb)->pkt_len = pkt_len;
 }
-EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 
 void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
 {
        if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
-               printk(KERN_WARNING
-                      "%s: %s qdisc %X: is non-work-conserving?\n",
-                      txt, qdisc->ops->id, qdisc->handle >> 16);
+               pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
+                       txt, qdisc->ops->id, qdisc->handle >> 16);
                qdisc->flags |= TCQ_F_WARN_NONWC;
        }
 }
@@ -472,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
        struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
                                                 timer);
 
-       wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(wd->qdisc);
        __netif_schedule(qdisc_root(wd->qdisc));
 
        return HRTIMER_NORESTART;
@@ -494,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
                     &qdisc_root_sleeping(wd->qdisc)->state))
                return;
 
-       wd->qdisc->flags |= TCQ_F_THROTTLED;
+       qdisc_throttled(wd->qdisc);
        time = ktime_set(0, 0);
        time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
        hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -504,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
        hrtimer_cancel(&wd->timer);
-       wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(wd->qdisc);
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
 
@@ -625,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
                        autohandle = TC_H_MAKE(0x80000000U, 0);
        } while (qdisc_lookup(dev, autohandle) && --i > 0);
 
-       return i>0 ? autohandle : 0;
+       return i > 0 ? autohandle : 0;
 }
 
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -834,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                                err = PTR_ERR(stab);
                                goto err_out4;
                        }
-                       sch->stab = stab;
+                       rcu_assign_pointer(sch->stab, stab);
                }
                if (tca[TCA_RATE]) {
                        spinlock_t *root_lock;
@@ -874,7 +880,7 @@ err_out4:
         * Any broken qdiscs that would require a ops->reset() here?
         * The qdisc was never in action so it shouldn't be necessary.
         */
-       qdisc_put_stab(sch->stab);
+       qdisc_put_stab(rtnl_dereference(sch->stab));
        if (ops->destroy)
                ops->destroy(sch);
        goto err_out3;
@@ -882,7 +888,7 @@ err_out4:
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-       struct qdisc_size_table *stab = NULL;
+       struct qdisc_size_table *ostab, *stab = NULL;
        int err = 0;
 
        if (tca[TCA_OPTIONS]) {
@@ -899,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
                        return PTR_ERR(stab);
        }
 
-       qdisc_put_stab(sch->stab);
-       sch->stab = stab;
+       ostab = rtnl_dereference(sch->stab);
+       rcu_assign_pointer(sch->stab, stab);
+       qdisc_put_stab(ostab);
 
        if (tca[TCA_RATE]) {
                /* NB: ignores errors from replace_estimator
@@ -915,9 +922,8 @@ out:
        return 0;
 }
 
-struct check_loop_arg
-{
-       struct qdisc_walker     w;
+struct check_loop_arg {
+       struct qdisc_walker     w;
        struct Qdisc            *p;
        int                     depth;
 };
@@ -970,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        struct Qdisc *p = NULL;
        int err;
 
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                return -ENODEV;
 
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -980,12 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        if (clid) {
                if (clid != TC_H_ROOT) {
                        if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
-                               if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+                               p = qdisc_lookup(dev, TC_H_MAJ(clid));
+                               if (!p)
                                        return -ENOENT;
                                q = qdisc_leaf(p, clid);
-                       } else { /* ingress */
-                               if (dev_ingress_queue(dev))
-                                       q = dev_ingress_queue(dev)->qdisc_sleeping;
+                       } else if (dev_ingress_queue(dev)) {
+                               q = dev_ingress_queue(dev)->qdisc_sleeping;
                        }
                } else {
                        q = dev->qdisc;
@@ -996,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
                        return -EINVAL;
        } else {
-               if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+               q = qdisc_lookup(dev, tcm->tcm_handle);
+               if (!q)
                        return -ENOENT;
        }
 
@@ -1008,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                        return -EINVAL;
                if (q->handle == 0)
                        return -ENOENT;
-               if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+               err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+               if (err != 0)
                        return err;
        } else {
                qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1017,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 }
 
 /*
  Create/change qdisc.
* Create/change qdisc.
  */
 
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1036,7 +1045,8 @@ replay:
        clid = tcm->tcm_parent;
        q = p = NULL;
 
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                return -ENODEV;
 
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1046,12 +1056,12 @@ replay:
        if (clid) {
                if (clid != TC_H_ROOT) {
                        if (clid != TC_H_INGRESS) {
-                               if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+                               p = qdisc_lookup(dev, TC_H_MAJ(clid));
+                               if (!p)
                                        return -ENOENT;
                                q = qdisc_leaf(p, clid);
-                       } else { /* ingress */
-                               if (dev_ingress_queue_create(dev))
-                                       q = dev_ingress_queue(dev)->qdisc_sleeping;
+                       } else if (dev_ingress_queue_create(dev)) {
+                               q = dev_ingress_queue(dev)->qdisc_sleeping;
                        }
                } else {
                        q = dev->qdisc;
@@ -1063,13 +1073,14 @@ replay:
 
                if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
                        if (tcm->tcm_handle) {
-                               if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+                               if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
                                        return -EEXIST;
                                if (TC_H_MIN(tcm->tcm_handle))
                                        return -EINVAL;
-                               if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+                               q = qdisc_lookup(dev, tcm->tcm_handle);
+                               if (!q)
                                        goto create_n_graft;
-                               if (n->nlmsg_flags&NLM_F_EXCL)
+                               if (n->nlmsg_flags & NLM_F_EXCL)
                                        return -EEXIST;
                                if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
                                        return -EINVAL;
@@ -1079,7 +1090,7 @@ replay:
                                atomic_inc(&q->refcnt);
                                goto graft;
                        } else {
-                               if (q == NULL)
+                               if (!q)
                                        goto create_n_graft;
 
                                /* This magic test requires explanation.
@@ -1101,9 +1112,9 @@ replay:
                                 *   For now we select create/graft, if
                                 *   user gave KIND, which does not match existing.
                                 */
-                               if ((n->nlmsg_flags&NLM_F_CREATE) &&
-                                   (n->nlmsg_flags&NLM_F_REPLACE) &&
-                                   ((n->nlmsg_flags&NLM_F_EXCL) ||
+                               if ((n->nlmsg_flags & NLM_F_CREATE) &&
+                                   (n->nlmsg_flags & NLM_F_REPLACE) &&
+                                   ((n->nlmsg_flags & NLM_F_EXCL) ||
                                     (tca[TCA_KIND] &&
                                      nla_strcmp(tca[TCA_KIND], q->ops->id))))
                                        goto create_n_graft;
@@ -1118,7 +1129,7 @@ replay:
        /* Change qdisc parameters */
        if (q == NULL)
                return -ENOENT;
-       if (n->nlmsg_flags&NLM_F_EXCL)
+       if (n->nlmsg_flags & NLM_F_EXCL)
                return -EEXIST;
        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
                return -EINVAL;
@@ -1128,7 +1139,7 @@ replay:
        return err;
 
 create_n_graft:
-       if (!(n->nlmsg_flags&NLM_F_CREATE))
+       if (!(n->nlmsg_flags & NLM_F_CREATE))
                return -ENOENT;
        if (clid == TC_H_INGRESS) {
                if (dev_ingress_queue(dev))
@@ -1175,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
        struct nlmsghdr  *nlh;
        unsigned char *b = skb_tail_pointer(skb);
        struct gnet_dump d;
+       struct qdisc_size_table *stab;
 
        nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
        tcm = NLMSG_DATA(nlh);
@@ -1190,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                goto nla_put_failure;
        q->qstats.qlen = q->q.qlen;
 
-       if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+       stab = rtnl_dereference(q->stab);
+       if (stab && qdisc_dump_stab(skb, stab) < 0)
                goto nla_put_failure;
 
        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1234,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
                return -ENOBUFS;
 
        if (old && !tc_qdisc_dump_ignore(old)) {
-               if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+               if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+                                 0, RTM_DELQDISC) < 0)
                        goto err_out;
        }
        if (new && !tc_qdisc_dump_ignore(new)) {
-               if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+               if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+                                 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
                        goto err_out;
        }
 
        if (skb->len)
-               return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+               return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                                     n->nlmsg_flags & NLM_F_ECHO);
 
 err_out:
        kfree_skb(skb);
@@ -1275,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                        q_idx++;
                        continue;
                }
-               if (!tc_qdisc_dump_ignore(q) && 
+               if (!tc_qdisc_dump_ignore(q) &&
                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
                                  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
                        goto done;
@@ -1356,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        u32 qid = TC_H_MAJ(clid);
        int err;
 
-       if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                return -ENODEV;
 
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1391,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                        qid = dev->qdisc->handle;
 
                /* Now qid is genuine qdisc handle consistent
-                  both with parent and child.
-
-                  TC_H_MAJ(pid) still may be unspecified, complete it now.
+                * both with parent and child.
+                *
+                * TC_H_MAJ(pid) still may be unspecified, complete it now.
                 */
                if (pid)
                        pid = TC_H_MAKE(qid, pid);
@@ -1403,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        }
 
        /* OK. Locate qdisc */
-       if ((q = qdisc_lookup(dev, qid)) == NULL)
+       q = qdisc_lookup(dev, qid);
+       if (!q)
                return -ENOENT;
 
        /* An check that it supports classes */
@@ -1423,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
        if (cl == 0) {
                err = -ENOENT;
-               if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+               if (n->nlmsg_type != RTM_NEWTCLASS ||
+                   !(n->nlmsg_flags & NLM_F_CREATE))
                        goto out;
        } else {
                switch (n->nlmsg_type) {
                case RTM_NEWTCLASS:
                        err = -EEXIST;
-                       if (n->nlmsg_flags&NLM_F_EXCL)
+                       if (n->nlmsg_flags & NLM_F_EXCL)
                                goto out;
                        break;
                case RTM_DELTCLASS:
@@ -1521,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
                return -EINVAL;
        }
 
-       return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                             n->nlmsg_flags & NLM_F_ECHO);
 }
 
-struct qdisc_dump_args
-{
-       struct qdisc_walker w;
-       struct sk_buff *skb;
-       struct netlink_callback *cb;
+struct qdisc_dump_args {
+       struct qdisc_walker     w;
+       struct sk_buff          *skb;
+       struct netlink_callback *cb;
 };
 
 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1590,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+       struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
        struct net *net = sock_net(skb->sk);
        struct netdev_queue *dev_queue;
        struct net_device *dev;
@@ -1598,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return 0;
-       if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+       dev = dev_get_by_index(net, tcm->tcm_ifindex);
+       if (!dev)
                return 0;
 
        s_t = cb->args[0];
@@ -1621,19 +1641,22 @@ done:
 }
 
 /* Main classifier routine: scans classifier chain attached
  to this qdisc, (optionally) tests for protocol and asks
  specific classifiers.
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
  */
 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
                       struct tcf_result *res)
 {
        __be16 protocol = skb->protocol;
-       int err = 0;
+       int err;
 
        for (; tp; tp = tp->next) {
-               if ((tp->protocol == protocol ||
-                    tp->protocol == htons(ETH_P_ALL)) &&
-                   (err = tp->classify(skb, tp, res)) >= 0) {
+               if (tp->protocol != protocol &&
+                   tp->protocol != htons(ETH_P_ALL))
+                       continue;
+               err = tp->classify(skb, tp, res);
+
+               if (err >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                        if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
                                skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1664,11 +1687,11 @@ reclassify:
 
                if (verd++ >= MAX_REC_LOOP) {
                        if (net_ratelimit())
-                               printk(KERN_NOTICE
-                                      "%s: packet reclassify loop"
+                               pr_notice("%s: packet reclassify loop"
                                          " rule prio %u protocol %02x\n",
-                                      tp->q->ops->id,
-                                      tp->prio & 0xffff, ntohs(tp->protocol));
+                                         tp->q->ops->id,
+                                         tp->prio & 0xffff,
+                                         ntohs(tp->protocol));
                        return TC_ACT_SHOT;
                }
                skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1761,7 +1784,7 @@ static int __init pktsched_init(void)
 
        err = register_pernet_subsys(&psched_net_ops);
        if (err) {
-               printk(KERN_ERR "pktsched_init: "
+               pr_err("pktsched_init: "
                       "cannot initialize per netns operations\n");
                return err;
        }
index 943d733..3f08158 100644 (file)
@@ -319,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
         * creation), and one for the reference held when calling delete.
         */
        if (flow->ref < 2) {
-               printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+               pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
                return -EINVAL;
        }
        if (flow->ref > 2)
@@ -384,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                        }
                }
                flow = NULL;
-       done:
-               ;               
+done:
+               ;
        }
-       if (!flow)
+       if (!flow) {
                flow = &p->link;
-       else {
+       else {
                if (flow->vcc)
                        ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
                /*@@@ looks good ... but it's not supposed to work :-) */
@@ -576,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
 
        list_for_each_entry_safe(flow, tmp, &p->flows, list) {
                if (flow->ref > 1)
-                       printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
-                              flow->ref);
+                       pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
                atm_tc_put(sch, (unsigned long)flow);
        }
        tasklet_kill(&p->task);
@@ -616,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
        }
        if (flow->excess)
                NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
-       else {
+       else
                NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
-       }
 
        nla_nest_end(skb, nest);
        return skb->len;
index 5f63ec5..24d94c0 100644 (file)
@@ -72,8 +72,7 @@
 struct cbq_sched_data;
 
 
-struct cbq_class
-{
+struct cbq_class {
        struct Qdisc_class_common common;
        struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
 
@@ -139,19 +138,18 @@ struct cbq_class
        int                     refcnt;
        int                     filters;
 
-       struct cbq_class        *defaults[TC_PRIO_MAX+1];
+       struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 };
 
-struct cbq_sched_data
-{
+struct cbq_sched_data {
        struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
-       int                     nclasses[TC_CBQ_MAXPRIO+1];
-       unsigned                quanta[TC_CBQ_MAXPRIO+1];
+       int                     nclasses[TC_CBQ_MAXPRIO + 1];
+       unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
 
        struct cbq_class        link;
 
-       unsigned                activemask;
-       struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
+       unsigned int            activemask;
+       struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
                                                                   with backlog */
 
 #ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
        int                     tx_len;
        psched_time_t           now;            /* Cached timestamp */
        psched_time_t           now_rt;         /* Cached real time */
-       unsigned                pmask;
+       unsigned int            pmask;
 
        struct hrtimer          delay_timer;
        struct qdisc_watchdog   watchdog;       /* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
 };
 
 
-#define L2T(cl,len)    qdisc_l2t((cl)->R_tab,len)
+#define L2T(cl, len)   qdisc_l2t((cl)->R_tab, len)
 
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 {
        struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 static struct cbq_class *
 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 {
-       struct cbq_class *cl, *new;
+       struct cbq_class *cl;
 
-       for (cl = this->tparent; cl; cl = cl->tparent)
-               if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
-                       return new;
+       for (cl = this->tparent; cl; cl = cl->tparent) {
+               struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 
+               if (new != NULL && new != this)
+                       return new;
+       }
        return NULL;
 }
 
 #endif
 
 /* Classify packet. The procedure is pretty complicated, but
  it allows us to combine link sharing and priority scheduling
  transparently.
-
  Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
  so that it resolves to split nodes. Then packets are classified
  by logical priority, or a more specific classifier may be attached
  to the split node.
* it allows us to combine link sharing and priority scheduling
* transparently.
+ *
* Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
* so that it resolves to split nodes. Then packets are classified
* by logical priority, or a more specific classifier may be attached
* to the split node.
  */
 
 static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
        /*
         *  Step 1. If skb->priority points to one of our classes, use it.
         */
-       if (TC_H_MAJ(prio^sch->handle) == 0 &&
+       if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
            (cl = cbq_class_lookup(q, prio)) != NULL)
                return cl;
 
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
                        goto fallback;
 
-               if ((cl = (void*)res.class) == NULL) {
+               cl = (void *)res.class;
+               if (!cl) {
                        if (TC_H_MAJ(res.classid))
                                cl = cbq_class_lookup(q, res.classid);
-                       else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+                       else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
                                cl = defmap[TC_PRIO_BESTEFFORT];
 
                        if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
         * Step 4. No success...
         */
        if (TC_H_MAJ(prio) == 0 &&
-           !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+           !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
            !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
                return head;
 
@@ -290,12 +291,12 @@ fallback:
 }
 
 /*
  A packet has just been enqueued on the empty class.
  cbq_activate_class adds it to the tail of active class list
  of its priority band.
* A packet has just been enqueued on the empty class.
* cbq_activate_class adds it to the tail of active class list
* of its priority band.
  */
 
-static __inline__ void cbq_activate_class(struct cbq_class *cl)
+static inline void cbq_activate_class(struct cbq_class *cl)
 {
        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
        int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
 }
 
 /*
  Unlink class from active chain.
  Note that this same procedure is done directly in cbq_dequeue*
  during round-robin procedure.
* Unlink class from active chain.
* Note that this same procedure is done directly in cbq_dequeue*
* during round-robin procedure.
  */
 
 static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 {
        int toplevel = q->toplevel;
 
-       if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+       if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
                psched_time_t now;
                psched_tdiff_t incr;
 
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
                                q->toplevel = cl->level;
                                return;
                        }
-               } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+               } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
        }
 }
 
@@ -417,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
                delay += cl->offtime;
 
                /*
-                  Class goes to sleep, so that it will have no
-                  chance to work avgidle. Let's forgive it 8)
-
-                  BTW cbq-2.0 has a crap in this
-                  place, apparently they forgot to shift it by cl->ewma_log.
+                * Class goes to sleep, so that it will have no
+                * chance to work avgidle. Let's forgive it 8)
+                *
+                * BTW cbq-2.0 has a crap in this
+                * place, apparently they forgot to shift it by cl->ewma_log.
                 */
                if (cl->avgidle < 0)
                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -438,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
                q->wd_expires = delay;
 
        /* Dirty work! We must schedule wakeups based on
-          real available rate, rather than leaf rate,
-          which may be tiny (even zero).
+        * real available rate, rather than leaf rate,
+        * which may be tiny (even zero).
         */
        if (q->toplevel == TC_CBQ_MAXLEVEL) {
                struct cbq_class *b;
@@ -459,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 }
 
 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
  they go overlimit
* they go overlimit
  */
 
 static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -594,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
        struct Qdisc *sch = q->watchdog.qdisc;
        psched_time_t now;
        psched_tdiff_t delay = 0;
-       unsigned pmask;
+       unsigned int pmask;
 
        now = psched_get_time();
 
@@ -623,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
                hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
        }
 
-       sch->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(sch);
        __netif_schedule(qdisc_root(sch));
        return HRTIMER_NORESTART;
 }
@@ -663,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 #endif
 
 /*
  It is mission critical procedure.
-
  We "regenerate" toplevel cutoff, if transmitting class
  has backlog and it is not regulated. It is not part of
  original CBQ description, but looks more reasonable.
  Probably, it is wrong. This question needs further investigation.
-*/
* It is mission critical procedure.
+ *
* We "regenerate" toplevel cutoff, if transmitting class
* has backlog and it is not regulated. It is not part of
* original CBQ description, but looks more reasonable.
* Probably, it is wrong. This question needs further investigation.
+ */
 
-static __inline__ void
+static inline void
 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
                    struct cbq_class *borrowed)
 {
@@ -682,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
                                        q->toplevel = borrowed->level;
                                        return;
                                }
-                       } while ((borrowed=borrowed->borrow) != NULL);
+                       } while ((borrowed = borrowed->borrow) != NULL);
                }
 #if 0
        /* It is not necessary now. Uncommenting it
@@ -710,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
                cl->bstats.bytes += len;
 
                /*
-                  (now - last) is total time between packet right edges.
-                  (last_pktlen/rate) is "virtual" busy time, so that
-
-                        idle = (now - last) - last_pktlen/rate
+                * (now - last) is total time between packet right edges.
+                * (last_pktlen/rate) is "virtual" busy time, so that
+                *
+                *      idle = (now - last) - last_pktlen/rate
                 */
 
                idle = q->now - cl->last;
@@ -723,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
                        idle -= L2T(cl, len);
 
                /* true_avgidle := (1-W)*true_avgidle + W*idle,
-                  where W=2^{-ewma_log}. But cl->avgidle is scaled:
-                  cl->avgidle == true_avgidle/W,
-                  hence:
+                * where W=2^{-ewma_log}. But cl->avgidle is scaled:
+                * cl->avgidle == true_avgidle/W,
+                * hence:
                 */
                        avgidle += idle - (avgidle>>cl->ewma_log);
                }
@@ -739,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
                        cl->avgidle = avgidle;
 
                        /* Calculate expected time, when this class
-                          will be allowed to send.
-                          It will occur, when:
-                          (1-W)*true_avgidle + W*delay = 0, i.e.
-                          idle = (1/W - 1)*(-true_avgidle)
-                          or
-                          idle = (1 - W)*(-cl->avgidle);
+                        * will be allowed to send.
+                        * It will occur, when:
+                        * (1-W)*true_avgidle + W*delay = 0, i.e.
+                        * idle = (1/W - 1)*(-true_avgidle)
+                        * or
+                        * idle = (1 - W)*(-cl->avgidle);
                         */
                        idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 
                        /*
-                          That is not all.
-                          To maintain the rate allocated to the class,
-                          we add to undertime virtual clock,
-                          necessary to complete transmitted packet.
-                          (len/phys_bandwidth has been already passed
-                          to the moment of cbq_update)
+                        * That is not all.
+                        * To maintain the rate allocated to the class,
+                        * we add to undertime virtual clock,
+                        * necessary to complete transmitted packet.
+                        * (len/phys_bandwidth has been already passed
+                        * to the moment of cbq_update)
                         */
 
                        idle -= L2T(&q->link, len);
@@ -776,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
        cbq_update_toplevel(q, this, q->tx_borrowed);
 }
 
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_under_limit(struct cbq_class *cl)
 {
        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -792,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
 
        do {
                /* It is very suspicious place. Now overlimit
-                  action is generated for not bounded classes
-                  only if link is completely congested.
-                  Though it is in agree with ancestor-only paradigm,
-                  it looks very stupid. Particularly,
-                  it means that this chunk of code will either
-                  never be called or result in strong amplification
-                  of burstiness. Dangerous, silly, and, however,
-                  no another solution exists.
+                * action is generated for not bounded classes
+                * only if link is completely congested.
+                * Though it is in agree with ancestor-only paradigm,
+                * it looks very stupid. Particularly,
+                * it means that this chunk of code will either
+                * never be called or result in strong amplification
+                * of burstiness. Dangerous, silly, and, however,
+                * no another solution exists.
                 */
-               if ((cl = cl->borrow) == NULL) {
+               cl = cl->borrow;
+               if (!cl) {
                        this_cl->qstats.overlimits++;
                        this_cl->overlimit(this_cl);
                        return NULL;
@@ -814,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
        return cl;
 }
 
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
@@ -838,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 
                        if (cl->deficit <= 0) {
                                /* Class exhausted its allotment per
-                                  this round. Switch to the next one.
+                                * this round. Switch to the next one.
                                 */
                                deficit = 1;
                                cl->deficit += cl->quantum;
@@ -848,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
                        skb = cl->q->dequeue(cl->q);
 
                        /* Class did not give us any skb :-(
-                          It could occur even if cl->q->q.qlen != 0
-                          f.e. if cl->q == "tbf"
+                        * It could occur even if cl->q->q.qlen != 0
+                        * f.e. if cl->q == "tbf"
                         */
                        if (skb == NULL)
                                goto skip_class;
@@ -878,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 skip_class:
                        if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
                                /* Class is empty or penalized.
-                                  Unlink it from active chain.
+                                * Unlink it from active chain.
                                 */
                                cl_prev->next_alive = cl->next_alive;
                                cl->next_alive = NULL;
@@ -917,14 +919,14 @@ next_class:
        return NULL;
 }
 
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_1(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-       unsigned activemask;
+       unsigned int activemask;
 
-       activemask = q->activemask&0xFF;
+       activemask = q->activemask & 0xFF;
        while (activemask) {
                int prio = ffz(~activemask);
                activemask &= ~(1<<prio);
@@ -949,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
        if (q->tx_class) {
                psched_tdiff_t incr2;
                /* Time integrator. We calculate EOS time
-                  by adding expected packet transmission time.
-                  If real time is greater, we warp artificial clock,
-                  so that:
-
-                  cbq_time = max(real_time, work);
+                * by adding expected packet transmission time.
+                * If real time is greater, we warp artificial clock,
+                * so that:
+                *
+                * cbq_time = max(real_time, work);
                 */
                incr2 = L2T(&q->link, q->tx_len);
                q->now += incr2;
@@ -971,27 +973,27 @@ cbq_dequeue(struct Qdisc *sch)
                if (skb) {
                        qdisc_bstats_update(sch, skb);
                        sch->q.qlen--;
-                       sch->flags &= ~TCQ_F_THROTTLED;
+                       qdisc_unthrottled(sch);
                        return skb;
                }
 
                /* All the classes are overlimit.
-
-                  It is possible, if:
-
-                  1. Scheduler is empty.
-                  2. Toplevel cutoff inhibited borrowing.
-                  3. Root class is overlimit.
-
-                  Reset 2d and 3d conditions and retry.
-
-                  Note, that NS and cbq-2.0 are buggy, peeking
-                  an arbitrary class is appropriate for ancestor-only
-                  sharing, but not for toplevel algorithm.
-
-                  Our version is better, but slower, because it requires
-                  two passes, but it is unavoidable with top-level sharing.
-               */
+                *
+                * It is possible, if:
+                *
+                * 1. Scheduler is empty.
+                * 2. Toplevel cutoff inhibited borrowing.
+                * 3. Root class is overlimit.
+                *
+                * Reset 2d and 3d conditions and retry.
+                *
+                * Note, that NS and cbq-2.0 are buggy, peeking
+                * an arbitrary class is appropriate for ancestor-only
+                * sharing, but not for toplevel algorithm.
+                *
+                * Our version is better, but slower, because it requires
+                * two passes, but it is unavoidable with top-level sharing.
+                */
 
                if (q->toplevel == TC_CBQ_MAXLEVEL &&
                    q->link.undertime == PSCHED_PASTPERFECT)
@@ -1002,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
        }
 
        /* No packets in scheduler or nobody wants to give them to us :-(
-          Sigh... start watchdog timer in the last case. */
+        * Sigh... start watchdog timer in the last case.
+        */
 
        if (sch->q.qlen) {
                sch->qstats.overlimits++;
@@ -1024,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
                int level = 0;
                struct cbq_class *cl;
 
-               if ((cl = this->children) != NULL) {
+               cl = this->children;
+               if (cl) {
                        do {
                                if (cl->level > level)
                                        level = cl->level;
                        } while ((cl = cl->sibling) != this->children);
                }
-               this->level = level+1;
+               this->level = level + 1;
        } while ((this = this->tparent) != NULL);
 }
 
@@ -1046,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
        for (h = 0; h < q->clhash.hashsize; h++) {
                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
                        /* BUGGGG... Beware! This expression suffer of
-                          arithmetic overflows!
+                        * arithmetic overflows!
                         */
                        if (cl->priority == prio) {
                                cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
                                        q->quanta[prio];
                        }
                        if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
-                               printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+                               pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+                                          cl->common.classid, cl->quantum);
                                cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
                        }
                }
@@ -1064,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 {
        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
        struct cbq_class *split = cl->split;
-       unsigned h;
+       unsigned int h;
        int i;
 
        if (split == NULL)
                return;
 
-       for (i=0; i<=TC_PRIO_MAX; i++) {
-               if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+       for (i = 0; i <= TC_PRIO_MAX; i++) {
+               if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
                        split->defaults[i] = NULL;
        }
 
-       for (i=0; i<=TC_PRIO_MAX; i++) {
+       for (i = 0; i <= TC_PRIO_MAX; i++) {
                int level = split->level;
 
                if (split->defaults[i])
@@ -1088,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
                        hlist_for_each_entry(c, n, &q->clhash.hash[h],
                                             common.hnode) {
                                if (c->split == split && c->level < level &&
-                                   c->defmap&(1<<i)) {
+                                   c->defmap & (1<<i)) {
                                        split->defaults[i] = c;
                                        level = c->level;
                                }
@@ -1102,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
        struct cbq_class *split = NULL;
 
        if (splitid == 0) {
-               if ((split = cl->split) == NULL)
+               split = cl->split;
+               if (!split)
                        return;
                splitid = split->common.classid;
        }
@@ -1120,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
                cl->defmap = 0;
                cbq_sync_defmap(cl);
                cl->split = split;
-               cl->defmap = def&mask;
+               cl->defmap = def & mask;
        } else
-               cl->defmap = (cl->defmap&~mask)|(def&mask);
+               cl->defmap = (cl->defmap & ~mask) | (def & mask);
 
        cbq_sync_defmap(cl);
 }
@@ -1135,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
        qdisc_class_hash_remove(&q->clhash, &this->common);
 
        if (this->tparent) {
-               clp=&this->sibling;
+               clp = &this->sibling;
                cl = *clp;
                do {
                        if (cl == this) {
@@ -1174,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
        }
 }
 
-static unsigned int cbq_drop(struct Qdiscsch)
+static unsigned int cbq_drop(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl, *cl_head;
@@ -1182,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
        unsigned int len;
 
        for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
-               if ((cl_head = q->active[prio]) == NULL)
+               cl_head = q->active[prio];
+               if (!cl_head)
                        continue;
 
                cl = cl_head;
@@ -1199,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
 }
 
 static void
-cbq_reset(struct Qdiscsch)
+cbq_reset(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl;
        struct hlist_node *n;
        int prio;
-       unsigned h;
+       unsigned int h;
 
        q->activemask = 0;
        q->pmask = 0;
@@ -1237,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
 
 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
 {
-       if (lss->change&TCF_CBQ_LSS_FLAGS) {
-               cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
-               cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+       if (lss->change & TCF_CBQ_LSS_FLAGS) {
+               cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+               cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
        }
-       if (lss->change&TCF_CBQ_LSS_EWMA)
+       if (lss->change & TCF_CBQ_LSS_EWMA)
                cl->ewma_log = lss->ewma_log;
-       if (lss->change&TCF_CBQ_LSS_AVPKT)
+       if (lss->change & TCF_CBQ_LSS_AVPKT)
                cl->avpkt = lss->avpkt;
-       if (lss->change&TCF_CBQ_LSS_MINIDLE)
+       if (lss->change & TCF_CBQ_LSS_MINIDLE)
                cl->minidle = -(long)lss->minidle;
-       if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+       if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
                cl->maxidle = lss->maxidle;
                cl->avgidle = lss->maxidle;
        }
-       if (lss->change&TCF_CBQ_LSS_OFFTIME)
+       if (lss->change & TCF_CBQ_LSS_OFFTIME)
                cl->offtime = lss->offtime;
        return 0;
 }
@@ -1279,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
        if (wrr->weight)
                cl->weight = wrr->weight;
        if (wrr->priority) {
-               cl->priority = wrr->priority-1;
+               cl->priority = wrr->priority - 1;
                cl->cpriority = cl->priority;
                if (cl->priority >= cl->priority2)
-                       cl->priority2 = TC_CBQ_MAXPRIO-1;
+                       cl->priority2 = TC_CBQ_MAXPRIO - 1;
        }
 
        cbq_addprio(q, cl);
@@ -1299,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
                cl->overlimit = cbq_ovl_delay;
                break;
        case TC_CBQ_OVL_LOWPRIO:
-               if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
-                   ovl->priority2-1 <= cl->priority)
+               if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
+                   ovl->priority2 - 1 <= cl->priority)
                        return -EINVAL;
-               cl->priority2 = ovl->priority2-1;
+               cl->priority2 = ovl->priority2 - 1;
                cl->overlimit = cbq_ovl_lowprio;
                break;
        case TC_CBQ_OVL_DROP:
@@ -1381,9 +1388,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
        if (!q->link.q)
                q->link.q = &noop_qdisc;
 
-       q->link.priority = TC_CBQ_MAXPRIO-1;
-       q->link.priority2 = TC_CBQ_MAXPRIO-1;
-       q->link.cpriority = TC_CBQ_MAXPRIO-1;
+       q->link.priority = TC_CBQ_MAXPRIO - 1;
+       q->link.priority2 = TC_CBQ_MAXPRIO - 1;
+       q->link.cpriority = TC_CBQ_MAXPRIO - 1;
        q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
        q->link.overlimit = cbq_ovl_classic;
        q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1414,7 +1421,7 @@ put_rtab:
        return err;
 }
 
-static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
 
@@ -1426,7 +1433,7 @@ nla_put_failure:
        return -1;
 }
 
-static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_lssopt opt;
@@ -1451,15 +1458,15 @@ nla_put_failure:
        return -1;
 }
 
-static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_wrropt opt;
 
        opt.flags = 0;
        opt.allot = cl->allot;
-       opt.priority = cl->priority+1;
-       opt.cpriority = cl->cpriority+1;
+       opt.priority = cl->priority + 1;
+       opt.cpriority = cl->cpriority + 1;
        opt.weight = cl->weight;
        NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
        return skb->len;
@@ -1469,13 +1476,13 @@ nla_put_failure:
        return -1;
 }
 
-static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_ovl opt;
 
        opt.strategy = cl->ovl_strategy;
-       opt.priority2 = cl->priority2+1;
+       opt.priority2 = cl->priority2 + 1;
        opt.pad = 0;
        opt.penalty = cl->penalty;
        NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1486,7 +1493,7 @@ nla_put_failure:
        return -1;
 }
 
-static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_fopt opt;
@@ -1505,7 +1512,7 @@ nla_put_failure:
 }
 
 #ifdef CONFIG_NET_CLS_ACT
-static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_police opt;
@@ -1569,7 +1576,7 @@ static int
 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
               struct sk_buff *skb, struct tcmsg *tcm)
 {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
        struct nlattr *nest;
 
        if (cl->tparent)
@@ -1597,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
        struct gnet_dump *d)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
 
        cl->qstats.qlen = cl->q->q.qlen;
        cl->xstats.avgidle = cl->avgidle;
@@ -1617,7 +1624,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                     struct Qdisc **old)
 {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
 
        if (new == NULL) {
                new = qdisc_create_dflt(sch->dev_queue,
@@ -1640,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
        return 0;
 }
 
-static struct Qdisc *
-cbq_leaf(struct Qdisc *sch, unsigned long arg)
+static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
 {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
 
        return cl->q;
 }
@@ -1682,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
                kfree(cl);
 }
 
-static void
-cbq_destroy(struct Qdisc* sch)
+static void cbq_destroy(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct hlist_node *n, *next;
        struct cbq_class *cl;
-       unsigned h;
+       unsigned int h;
 
 #ifdef CONFIG_NET_CLS_ACT
        q->rx_class = NULL;
@@ -1712,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
 
 static void cbq_put(struct Qdisc *sch, unsigned long arg)
 {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
 
        if (--cl->refcnt == 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1735,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 {
        int err;
        struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *cl = (struct cbq_class*)*arg;
+       struct cbq_class *cl = (struct cbq_class *)*arg;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_CBQ_MAX + 1];
        struct cbq_class *parent;
@@ -1827,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
        if (classid) {
                err = -EINVAL;
-               if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+               if (TC_H_MAJ(classid ^ sch->handle) ||
+                   cbq_class_lookup(q, classid))
                        goto failure;
        } else {
                int i;
-               classid = TC_H_MAKE(sch->handle,0x8000);
+               classid = TC_H_MAKE(sch->handle, 0x8000);
 
-               for (i=0; i<0x8000; i++) {
+               for (i = 0; i < 0x8000; i++) {
                        if (++q->hgenerator >= 0x8000)
                                q->hgenerator = 1;
                        if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1890,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        cl->minidle = -0x7FFFFFFF;
        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-       if (cl->ewma_log==0)
+       if (cl->ewma_log == 0)
                cl->ewma_log = q->link.ewma_log;
-       if (cl->maxidle==0)
+       if (cl->maxidle == 0)
                cl->maxidle = q->link.maxidle;
-       if (cl->avpkt==0)
+       if (cl->avpkt == 0)
                cl->avpkt = q->link.avpkt;
        cl->overlimit = cbq_ovl_classic;
        if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1920,7 +1926,7 @@ failure:
 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
        unsigned int qlen;
 
        if (cl->filters || cl->children || cl == &q->link)
@@ -1978,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
                                     u32 classid)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
-       struct cbq_class *p = (struct cbq_class*)parent;
+       struct cbq_class *p = (struct cbq_class *)parent;
        struct cbq_class *cl = cbq_class_lookup(q, classid);
 
        if (cl) {
@@ -1992,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
 
 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-       struct cbq_class *cl = (struct cbq_class*)arg;
+       struct cbq_class *cl = (struct cbq_class *)arg;
 
        cl->filters--;
 }
@@ -2002,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl;
        struct hlist_node *n;
-       unsigned h;
+       unsigned int h;
 
        if (arg->stop)
                return;
index 0f7bf3f..2c79020 100644 (file)
@@ -137,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
                mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
 
        if (tb[TCA_DSMARK_VALUE])
-               p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+               p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
 
        if (tb[TCA_DSMARK_MASK])
-               p->mask[*arg-1] = mask;
+               p->mask[*arg - 1] = mask;
 
        err = 0;
 
@@ -155,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
        if (!dsmark_valid_index(p, arg))
                return -EINVAL;
 
-       p->mask[arg-1] = 0xff;
-       p->value[arg-1] = 0;
+       p->mask[arg - 1] = 0xff;
+       p->value[arg - 1] = 0;
 
        return 0;
 }
@@ -175,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
                if (p->mask[i] == 0xff && !p->value[i])
                        goto ignore;
                if (walker->count >= walker->skip) {
-                       if (walker->fn(sch, i+1, walker) < 0) {
+                       if (walker->fn(sch, i + 1, walker) < 0) {
                                walker->stop = 1;
                                break;
                        }
@@ -304,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
                 * and don't need yet another qdisc as a bypass.
                 */
                if (p->mask[index] != 0xff || p->value[index])
-                       printk(KERN_WARNING
-                              "dsmark_dequeue: unsupported protocol %d\n",
-                              ntohs(skb->protocol));
+                       pr_warning("dsmark_dequeue: unsupported protocol %d\n",
+                                  ntohs(skb->protocol));
                break;
        }
 
@@ -424,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
        if (!dsmark_valid_index(p, cl))
                return -EINVAL;
 
-       tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+       tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
        tcm->tcm_info = p->q->handle;
 
        opts = nla_nest_start(skb, TCA_OPTIONS);
        if (opts == NULL)
                goto nla_put_failure;
-       NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
-       NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+       NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
+       NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
 
        return nla_nest_end(skb, opts);
 
index d468b47..be33f9d 100644 (file)
 
 /* 1 band FIFO pseudo-"scheduler" */
 
-struct fifo_sched_data
-{
+struct fifo_sched_data {
        u32 limit;
 };
 
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdiscsch)
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct fifo_sched_data *q = qdisc_priv(sch);
 
@@ -34,7 +33,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
        return qdisc_reshape_fail(skb, sch);
 }
 
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdiscsch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct fifo_sched_data *q = qdisc_priv(sch);
 
@@ -44,7 +43,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
        return qdisc_reshape_fail(skb, sch);
 }
 
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdiscsch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct fifo_sched_data *q = qdisc_priv(sch);
 
@@ -62,11 +61,13 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct fifo_sched_data *q = qdisc_priv(sch);
+       bool bypass;
+       bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
 
        if (opt == NULL) {
                u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
 
-               if (sch->ops == &bfifo_qdisc_ops)
+               if (is_bfifo)
                        limit *= psched_mtu(qdisc_dev(sch));
 
                q->limit = limit;
@@ -79,6 +80,15 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
                q->limit = ctl->limit;
        }
 
+       if (is_bfifo)
+               bypass = q->limit >= psched_mtu(qdisc_dev(sch));
+       else
+               bypass = q->limit >= 1;
+
+       if (bypass)
+               sch->flags |= TCQ_F_CAN_BYPASS;
+       else
+               sch->flags &= ~TCQ_F_CAN_BYPASS;
        return 0;
 }
 
index 34dc598..0da09d5 100644 (file)
@@ -87,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
                 */
                kfree_skb(skb);
                if (net_ratelimit())
-                       printk(KERN_WARNING "Dead loop on netdevice %s, "
-                              "fix it urgently!\n", dev_queue->dev->name);
+                       pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
+                                  dev_queue->dev->name);
                ret = qdisc_qlen(q);
        } else {
                /*
@@ -137,8 +137,8 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
        } else {
                /* Driver returned NETDEV_TX_BUSY - requeue skb */
                if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-                       printk(KERN_WARNING "BUG %s code %d qlen %d\n",
-                              dev->name, ret, q->q.qlen);
+                       pr_warning("BUG %s code %d qlen %d\n",
+                                  dev->name, ret, q->q.qlen);
 
                ret = dev_requeue_skb(skb, q);
        }
@@ -412,8 +412,9 @@ static struct Qdisc noqueue_qdisc = {
 };
 
 
-static const u8 prio2band[TC_PRIO_MAX+1] =
-       { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
+       1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
 
 /* 3-band FIFO queue: old style, but should be a bit faster than
    generic prio+fifo combination.
@@ -445,7 +446,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
        return priv->q + band;
 }
 
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdiscqdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
 {
        if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
                int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +461,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
        return qdisc_drop(skb, qdisc);
 }
 
-static struct sk_buff *pfifo_fast_dequeue(struct Qdiscqdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 {
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        int band = bitmap2band[priv->bitmap];
@@ -479,7 +480,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
        return NULL;
 }
 
-static struct sk_buff *pfifo_fast_peek(struct Qdiscqdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
 {
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        int band = bitmap2band[priv->bitmap];
@@ -493,7 +494,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
        return NULL;
 }
 
-static void pfifo_fast_reset(struct Qdiscqdisc)
+static void pfifo_fast_reset(struct Qdisc *qdisc)
 {
        int prio;
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +511,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
 {
        struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
 
-       memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+       memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
        return skb->len;
 
@@ -526,6 +527,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
                skb_queue_head_init(band2list(priv, prio));
 
+       /* Can by-pass the queue discipline */
+       qdisc->flags |= TCQ_F_CAN_BYPASS;
        return 0;
 }
 
@@ -540,6 +543,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
        .dump           =       pfifo_fast_dump,
        .owner          =       THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_fast_ops);
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          struct Qdisc_ops *ops)
@@ -630,7 +634,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 #ifdef CONFIG_NET_SCHED
        qdisc_list_del(qdisc);
 
-       qdisc_put_stab(qdisc->stab);
+       qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
        gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
        if (ops->reset)
@@ -674,25 +678,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 
        return oqdisc;
 }
+EXPORT_SYMBOL(dev_graft_qdisc);
 
 static void attach_one_default_qdisc(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
                                     void *_unused)
 {
-       struct Qdisc *qdisc;
+       struct Qdisc *qdisc = &noqueue_qdisc;
 
        if (dev->tx_queue_len) {
                qdisc = qdisc_create_dflt(dev_queue,
                                          &pfifo_fast_ops, TC_H_ROOT);
                if (!qdisc) {
-                       printk(KERN_INFO "%s: activation failed\n", dev->name);
+                       netdev_info(dev, "activation failed\n");
                        return;
                }
-
-               /* Can by-pass the queue discipline for default qdisc */
-               qdisc->flags |= TCQ_F_CAN_BYPASS;
-       } else {
-               qdisc =  &noqueue_qdisc;
        }
        dev_queue->qdisc_sleeping = qdisc;
 }
@@ -761,6 +761,7 @@ void dev_activate(struct net_device *dev)
                dev_watchdog_up(dev);
        }
 }
+EXPORT_SYMBOL(dev_activate);
 
 static void dev_deactivate_queue(struct net_device *dev,
                                 struct netdev_queue *dev_queue,
@@ -840,6 +841,7 @@ void dev_deactivate(struct net_device *dev)
        list_add(&dev->unreg_list, &single);
        dev_deactivate_many(&single);
 }
+EXPORT_SYMBOL(dev_deactivate);
 
 static void dev_init_scheduler_queue(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
index 51dcc2a..b9493a0 100644 (file)
@@ -32,8 +32,7 @@
 struct gred_sched_data;
 struct gred_sched;
 
-struct gred_sched_data
-{
+struct gred_sched_data {
        u32             limit;          /* HARD maximal queue length    */
        u32             DP;             /* the drop pramaters */
        u32             bytesin;        /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
        GRED_RIO_MODE,
 };
 
-struct gred_sched
-{
+struct gred_sched {
        struct gred_sched_data *tab[MAX_DPs];
        unsigned long   flags;
        u32             red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
        return t->red_flags & TC_RED_HARDDROP;
 }
 
-static int gred_enqueue(struct sk_buff *skb, struct Qdiscsch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-       struct gred_sched_data *q=NULL;
-       struct gred_sched *t= qdisc_priv(sch);
+       struct gred_sched_data *q = NULL;
+       struct gred_sched *t = qdisc_priv(sch);
        unsigned long qavg = 0;
        u16 dp = tc_index_to_dp(skb);
 
-       if (dp >= t->DPs  || (q = t->tab[dp]) == NULL) {
+       if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                dp = t->def;
 
-               if ((q = t->tab[dp]) == NULL) {
+               q = t->tab[dp];
+               if (!q) {
                        /* Pass through packets not assigned to a DP
                         * if no default DP has been configured. This
                         * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                for (i = 0; i < t->DPs; i++) {
                        if (t->tab[i] && t->tab[i]->prio < q->prio &&
                            !red_is_idling(&t->tab[i]->parms))
-                               qavg +=t->tab[i]->parms.qavg;
+                               qavg += t->tab[i]->parms.qavg;
                }
 
        }
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                gred_store_wred_set(t, q);
 
        switch (red_action(&q->parms, q->parms.qavg + qavg)) {
-               case RED_DONT_MARK:
-                       break;
-
-               case RED_PROB_MARK:
-                       sch->qstats.overlimits++;
-                       if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
-                               q->stats.prob_drop++;
-                               goto congestion_drop;
-                       }
-
-                       q->stats.prob_mark++;
-                       break;
-
-               case RED_HARD_MARK:
-                       sch->qstats.overlimits++;
-                       if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
-                           !INET_ECN_set_ce(skb)) {
-                               q->stats.forced_drop++;
-                               goto congestion_drop;
-                       }
-                       q->stats.forced_mark++;
-                       break;
+       case RED_DONT_MARK:
+               break;
+
+       case RED_PROB_MARK:
+               sch->qstats.overlimits++;
+               if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+                       q->stats.prob_drop++;
+                       goto congestion_drop;
+               }
+
+               q->stats.prob_mark++;
+               break;
+
+       case RED_HARD_MARK:
+               sch->qstats.overlimits++;
+               if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+                   !INET_ECN_set_ce(skb)) {
+                       q->stats.forced_drop++;
+                       goto congestion_drop;
+               }
+               q->stats.forced_mark++;
+               break;
        }
 
        if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
        return NET_XMIT_CN;
 }
 
-static struct sk_buff *gred_dequeue(struct Qdiscsch)
+static struct sk_buff *gred_dequeue(struct Qdisc *sch)
 {
        struct sk_buff *skb;
        struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
 
                if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                        if (net_ratelimit())
-                               printk(KERN_WARNING "GRED: Unable to relocate "
-                                      "VQ 0x%x after dequeue, screwing up "
-                                      "backlog.\n", tc_index_to_dp(skb));
+                               pr_warning("GRED: Unable to relocate VQ 0x%x "
+                                          "after dequeue, screwing up "
+                                          "backlog.\n", tc_index_to_dp(skb));
                } else {
                        q->backlog -= qdisc_pkt_len(skb);
 
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
        return NULL;
 }
 
-static unsigned int gred_drop(struct Qdiscsch)
+static unsigned int gred_drop(struct Qdisc *sch)
 {
        struct sk_buff *skb;
        struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
                if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                        if (net_ratelimit())
-                               printk(KERN_WARNING "GRED: Unable to relocate "
-                                      "VQ 0x%x while dropping, screwing up "
-                                      "backlog.\n", tc_index_to_dp(skb));
+                               pr_warning("GRED: Unable to relocate VQ 0x%x "
+                                          "while dropping, screwing up "
+                                          "backlog.\n", tc_index_to_dp(skb));
                } else {
                        q->backlog -= len;
                        q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
 
 }
 
-static void gred_reset(struct Qdiscsch)
+static void gred_reset(struct Qdisc *sch)
 {
        int i;
        struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 
        for (i = table->DPs; i < MAX_DPs; i++) {
                if (table->tab[i]) {
-                       printk(KERN_WARNING "GRED: Warning: Destroying "
-                              "shadowed VQ 0x%x\n", i);
+                       pr_warning("GRED: Warning: Destroying "
+                                  "shadowed VQ 0x%x\n", i);
                        gred_destroy_vq(table->tab[i]);
                        table->tab[i] = NULL;
                }
index 14a799d..6488e64 100644 (file)
@@ -81,8 +81,7 @@
  *   that are expensive on 32-bit architectures.
  */
 
-struct internal_sc
-{
+struct internal_sc {
        u64     sm1;    /* scaled slope of the 1st segment */
        u64     ism1;   /* scaled inverse-slope of the 1st segment */
        u64     dx;     /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
 };
 
 /* runtime service curve */
-struct runtime_sc
-{
+struct runtime_sc {
        u64     x;      /* current starting position on x-axis */
        u64     y;      /* current starting position on y-axis */
        u64     sm1;    /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
        u64     ism2;   /* scaled inverse-slope of the 2nd segment */
 };
 
-enum hfsc_class_flags
-{
+enum hfsc_class_flags {
        HFSC_RSC = 0x1,
        HFSC_FSC = 0x2,
        HFSC_USC = 0x4
 };
 
-struct hfsc_class
-{
+struct hfsc_class {
        struct Qdisc_class_common cl_common;
        unsigned int    refcnt;         /* usage count */
 
@@ -140,8 +136,8 @@ struct hfsc_class
        u64     cl_cumul;               /* cumulative work in bytes done by
                                           real-time criteria */
 
-       u64     cl_d;                   /* deadline*/
-       u64     cl_e;                   /* eligible time */
+       u64     cl_d;                   /* deadline*/
+       u64     cl_e;                   /* eligible time */
        u64     cl_vt;                  /* virtual time */
        u64     cl_f;                   /* time when this class will fit for
                                           link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
        unsigned long   cl_nactive;     /* number of active children */
 };
 
-struct hfsc_sched
-{
+struct hfsc_sched {
        u16     defcls;                         /* default class id */
        struct hfsc_class root;                 /* root class */
        struct Qdisc_class_hash clhash;         /* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
                if (go_active) {
                        n = rb_last(&cl->cl_parent->vt_tree);
                        if (n != NULL) {
-                               max_cl = rb_entry(n, struct hfsc_class,vt_node);
+                               max_cl = rb_entry(n, struct hfsc_class, vt_node);
                                /*
                                 * set vt to the average of the min and max
                                 * classes.  if the parent's period didn't
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                        return NULL;
                }
 #endif
-               if ((cl = (struct hfsc_class *)res.class) == NULL) {
-                       if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+               cl = (struct hfsc_class *)res.class;
+               if (!cl) {
+                       cl = hfsc_find_class(res.classid, sch);
+                       if (!cl)
                                break; /* filter selected invalid classid */
                        if (cl->level >= head->level)
                                break; /* filter may only point downwards */
@@ -1316,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
        return -1;
 }
 
-static inline int
+static int
 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
 {
        if ((cl->cl_flags & HFSC_RSC) &&
@@ -1420,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
        struct hfsc_class *cl;
        u64 next_time = 0;
 
-       if ((cl = eltree_get_minel(q)) != NULL)
+       cl = eltree_get_minel(q);
+       if (cl)
                next_time = cl->cl_e;
        if (q->root.cl_cfmin != 0) {
                if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1625,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
         * find the class with the minimum deadline among
         * the eligible classes.
         */
-       if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+       cl = eltree_get_mindl(q, cur_time);
+       if (cl) {
                realtime = 1;
        } else {
                /*
@@ -1664,7 +1663,7 @@ hfsc_dequeue(struct Qdisc *sch)
                set_passive(cl);
        }
 
-       sch->flags &= ~TCQ_F_THROTTLED;
+       qdisc_unthrottled(sch);
        qdisc_bstats_update(sch, skb);
        sch->q.qlen--;
 
index fc12fe6..e1429a8 100644 (file)
@@ -99,9 +99,10 @@ struct htb_class {
                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
                        /* When class changes from state 1->2 and disconnects from
-                          parent's feed then we lost ptr value and start from the
-                          first child again. Here we store classid of the
-                          last valid ptr (used when ptr is NULL). */
+                        * parent's feed then we lost ptr value and start from the
+                        * first child again. Here we store classid of the
+                        * last valid ptr (used when ptr is NULL).
+                        */
                        u32 last_ptr_id[TC_HTB_NUMPRIO];
                } inner;
        } un;
@@ -185,7 +186,7 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
  * then finish and return direct queue.
  */
-#define HTB_DIRECT (struct htb_class*)-1
+#define HTB_DIRECT ((struct htb_class *)-1L)
 
 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
                                      int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
        int result;
 
        /* allow to select class by setting skb->priority to valid classid;
-          note that nfmark can be used too by attaching filter fw with no
-          rules in it */
+        * note that nfmark can be used too by attaching filter fw with no
+        * rules in it
+        */
        if (skb->priority == sch->handle)
                return HTB_DIRECT;      /* X:0 (direct flow) selected */
-       if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+       cl = htb_find(skb->priority, sch);
+       if (cl && cl->level == 0)
                return cl;
 
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
                        return NULL;
                }
 #endif
-               if ((cl = (void *)res.class) == NULL) {
+               cl = (void *)res.class;
+               if (!cl) {
                        if (res.classid == sch->handle)
                                return HTB_DIRECT;      /* X:0 (direct flow) */
-                       if ((cl = htb_find(res.classid, sch)) == NULL)
+                       cl = htb_find(res.classid, sch);
+                       if (!cl)
                                break;  /* filter selected invalid classid */
                }
                if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 
                        if (p->un.inner.feed[prio].rb_node)
                                /* parent already has its feed in use so that
-                                  reset bit in mask as parent is already ok */
+                                * reset bit in mask as parent is already ok
+                                */
                                mask &= ~(1 << prio);
 
                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 
                        if (p->un.inner.ptr[prio] == cl->node + prio) {
                                /* we are removing child which is pointed to from
-                                  parent feed - forget the pointer but remember
-                                  classid */
+                                * parent feed - forget the pointer but remember
+                                * classid
+                                */
                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
                                p->un.inner.ptr[prio] = NULL;
                        }
@@ -663,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
                                   unsigned long start)
 {
        /* don't run for longer than 2 jiffies; 2 is used instead of
-          1 to simplify things when jiffy is going to be incremented
-          too soon */
+        * 1 to simplify things when jiffy is going to be incremented
+        * too soon
+        */
        unsigned long stop_at = start + 2;
        while (time_before(jiffies, stop_at)) {
                struct htb_class *cl;
@@ -687,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 
        /* too much load - let's continue after a break for scheduling */
        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
-               printk(KERN_WARNING "htb: too many events!\n");
+               pr_warning("htb: too many events!\n");
                q->warned |= HTB_WARN_TOOMANYEVENTS;
        }
 
@@ -695,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 }
 
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
-   is no such one exists. */
+ * is no such one exists.
+ */
 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
                                              u32 id)
 {
@@ -739,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
        for (i = 0; i < 65535; i++) {
                if (!*sp->pptr && *sp->pid) {
                        /* ptr was invalidated but id is valid - try to recover
-                          the original or next ptr */
+                        * the original or next ptr
+                        */
                        *sp->pptr =
                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
                }
                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
-                                  can become out of date quickly */
+                                * can become out of date quickly
+                                */
                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
                        *sp->pptr = sp->root;
                        while ((*sp->pptr)->rb_left)
@@ -772,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 }
 
 /* dequeues packet at given priority and level; call only if
-   you are sure that there is active class at prio/level */
+ * you are sure that there is active class at prio/level
+ */
 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
                                        int level)
 {
@@ -789,9 +801,10 @@ next:
                        return NULL;
 
                /* class can be empty - it is unlikely but can be true if leaf
-                  qdisc drops packets in enqueue routine or if someone used
-                  graft operation on the leaf since last dequeue;
-                  simply deactivate and skip such class */
+                * qdisc drops packets in enqueue routine or if someone used
+                * graft operation on the leaf since last dequeue;
+                * simply deactivate and skip such class
+                */
                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
                        struct htb_class *next;
                        htb_deactivate(q, cl);
@@ -831,7 +844,8 @@ next:
                                          ptr[0]) + prio);
                }
                /* this used to be after charge_class but this constelation
-                  gives us slightly better performance */
+                * gives us slightly better performance
+                */
                if (!cl->un.leaf.q->q.qlen)
                        htb_deactivate(q, cl);
                htb_charge_class(q, cl, level, skb);
@@ -852,7 +866,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
        if (skb != NULL) {
 ok:
                qdisc_bstats_update(sch, skb);
-               sch->flags &= ~TCQ_F_THROTTLED;
+               qdisc_unthrottled(sch);
                sch->q.qlen--;
                return skb;
        }
@@ -883,6 +897,7 @@ ok:
                m = ~q->row_mask[level];
                while (m != (int)(-1)) {
                        int prio = ffz(m);
+
                        m |= 1 << prio;
                        skb = htb_dequeue_tree(q, prio, level);
                        if (likely(skb != NULL))
@@ -987,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
                return err;
 
        if (tb[TCA_HTB_INIT] == NULL) {
-               printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+               pr_err("HTB: hey probably you have bad tc tool ?\n");
                return -EINVAL;
        }
        gopt = nla_data(tb[TCA_HTB_INIT]);
        if (gopt->version != HTB_VER >> 16) {
-               printk(KERN_ERR
-                      "HTB: need tc/htb version %d (minor is %d), you have %d\n",
+               pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
                return -EINVAL;
        }
@@ -1206,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
        cancel_work_sync(&q->work);
        qdisc_watchdog_cancel(&q->watchdog);
        /* This line used to be after htb_destroy_class call below
-          and surprisingly it worked in 2.4. But it must precede it
-          because filter need its target class alive to be able to call
-          unbind_filter on it (without Oops). */
+        * and surprisingly it worked in 2.4. But it must precede it
+        * because filter need its target class alive to be able to call
+        * unbind_filter on it (without Oops).
+        */
        tcf_destroy_chain(&q->filter_list);
 
        for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1342,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 
                /* check maximal depth */
                if (parent && parent->parent && parent->parent->level < 2) {
-                       printk(KERN_ERR "htb: tree is too deep\n");
+                       pr_err("htb: tree is too deep\n");
                        goto failure;
                }
                err = -ENOBUFS;
-               if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+               cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+               if (!cl)
                        goto failure;
 
                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1366,8 +1382,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                        RB_CLEAR_NODE(&cl->node[prio]);
 
                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
-                  so that can't be used inside of sch_tree_lock
-                  -- thanks to Karlis Peisenieks */
+                * so that can't be used inside of sch_tree_lock
+                * -- thanks to Karlis Peisenieks
+                */
                new_q = qdisc_create_dflt(sch->dev_queue,
                                          &pfifo_qdisc_ops, classid);
                sch_tree_lock(sch);
@@ -1419,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        }
 
        /* it used to be a nasty bug here, we have to check that node
-          is really leaf before changing cl->un.leaf ! */
+        * is really leaf before changing cl->un.leaf !
+        */
        if (!cl->level) {
                cl->quantum = rtab->rate.rate / q->rate2quantum;
                if (!hopt->quantum && cl->quantum < 1000) {
-                       printk(KERN_WARNING
+                       pr_warning(
                               "HTB: quantum of class %X is small. Consider r2q change.\n",
                               cl->common.classid);
                        cl->quantum = 1000;
                }
                if (!hopt->quantum && cl->quantum > 200000) {
-                       printk(KERN_WARNING
+                       pr_warning(
                               "HTB: quantum of class %X is big. Consider r2q change.\n",
                               cl->common.classid);
                        cl->quantum = 200000;
@@ -1478,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
        struct htb_class *cl = htb_find(classid, sch);
 
        /*if (cl && !cl->level) return 0;
-          The line above used to be there to prevent attaching filters to
-          leaves. But at least tc_index filter uses this just to get class
-          for other reasons so that we have to allow for it.
-          ----
-          19.6.2002 As Werner explained it is ok - bind filter is just
-          another way to "lock" the class - unlike "get" this lock can
-          be broken by class during destroy IIUC.
+        * The line above used to be there to prevent attaching filters to
+        * leaves. But at least tc_index filter uses this just to get class
+        * for other reasons so that we have to allow for it.
+        * ----
+        * 19.6.2002 As Werner explained it is ok - bind filter is just
+        * another way to "lock" the class - unlike "get" this lock can
+        * be broken by class during destroy IIUC.
         */
        if (cl)
                cl->filter_cnt++;
index ecc302f..ec5cbc8 100644 (file)
@@ -61,7 +61,6 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
                                                    TC_H_MIN(ntx + 1)));
                if (qdisc == NULL)
                        goto err;
-               qdisc->flags |= TCQ_F_CAN_BYPASS;
                priv->qdiscs[ntx] = qdisc;
        }
 
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644 (file)
index 0000000..fbc6f53
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+       struct Qdisc            **qdiscs;
+       int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       unsigned int ntx;
+
+       if (!priv->qdiscs)
+               return;
+
+       for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+               qdisc_destroy(priv->qdiscs[ntx]);
+
+       if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+               dev->netdev_ops->ndo_setup_tc(dev, 0);
+       else
+               netdev_set_num_tc(dev, 0);
+
+       kfree(priv->qdiscs);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+       int i, j;
+
+       /* Verify num_tc is not out of max range */
+       if (qopt->num_tc > TC_MAX_QUEUE)
+               return -EINVAL;
+
+       /* Verify priority mapping uses valid tcs */
+       for (i = 0; i < TC_BITMASK + 1; i++) {
+               if (qopt->prio_tc_map[i] >= qopt->num_tc)
+                       return -EINVAL;
+       }
+
+       /* net_device does not support requested operation */
+       if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+               return -EINVAL;
+
+       /* if hw owned qcount and qoffset are taken from LLD so
+        * no reason to verify them here
+        */
+       if (qopt->hw)
+               return 0;
+
+       for (i = 0; i < qopt->num_tc; i++) {
+               unsigned int last = qopt->offset[i] + qopt->count[i];
+
+               /* Verify the queue count is in tx range being equal to the
+                * real_num_tx_queues indicates the last queue is in use.
+                */
+               if (qopt->offset[i] >= dev->real_num_tx_queues ||
+                   !qopt->count[i] ||
+                   last > dev->real_num_tx_queues)
+                       return -EINVAL;
+
+               /* Verify that the offset and counts do not overlap */
+               for (j = i + 1; j < qopt->num_tc; j++) {
+                       if (last > qopt->offset[j])
+                               return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       struct netdev_queue *dev_queue;
+       struct Qdisc *qdisc;
+       int i, err = -EOPNOTSUPP;
+       struct tc_mqprio_qopt *qopt = NULL;
+
+       BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+       BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
+
+       if (sch->parent != TC_H_ROOT)
+               return -EOPNOTSUPP;
+
+       if (!netif_is_multiqueue(dev))
+               return -EOPNOTSUPP;
+
+       if (nla_len(opt) < sizeof(*qopt))
+               return -EINVAL;
+
+       qopt = nla_data(opt);
+       if (mqprio_parse_opt(dev, qopt))
+               return -EINVAL;
+
+       /* pre-allocate qdisc, attachment can't fail */
+       priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+                              GFP_KERNEL);
+       if (priv->qdiscs == NULL) {
+               err = -ENOMEM;
+               goto err;
+       }
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               dev_queue = netdev_get_tx_queue(dev, i);
+               qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+                                         TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                   TC_H_MIN(i + 1)));
+               if (qdisc == NULL) {
+                       err = -ENOMEM;
+                       goto err;
+               }
+               priv->qdiscs[i] = qdisc;
+       }
+
+       /* If the mqprio options indicate that hardware should own
+        * the queue mapping then run ndo_setup_tc otherwise use the
+        * supplied and verified mapping
+        */
+       if (qopt->hw) {
+               priv->hw_owned = 1;
+               err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+               if (err)
+                       goto err;
+       } else {
+               netdev_set_num_tc(dev, qopt->num_tc);
+               for (i = 0; i < qopt->num_tc; i++)
+                       netdev_set_tc_queue(dev, i,
+                                           qopt->count[i], qopt->offset[i]);
+       }
+
+       /* Always use supplied priority mappings */
+       for (i = 0; i < TC_BITMASK + 1; i++)
+               netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
+
+       sch->flags |= TCQ_F_MQROOT;
+       return 0;
+
+err:
+       mqprio_destroy(sch);
+       return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       struct Qdisc *qdisc;
+       unsigned int ntx;
+
+       /* Attach underlying qdisc */
+       for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+               qdisc = priv->qdiscs[ntx];
+               qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+               if (qdisc)
+                       qdisc_destroy(qdisc);
+       }
+       kfree(priv->qdiscs);
+       priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+                                            unsigned long cl)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+
+       if (ntx >= dev->num_tx_queues)
+               return NULL;
+       return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+                   struct Qdisc **old)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+       if (!dev_queue)
+               return -EINVAL;
+
+       if (dev->flags & IFF_UP)
+               dev_deactivate(dev);
+
+       *old = dev_graft_qdisc(dev_queue, new);
+
+       if (dev->flags & IFF_UP)
+               dev_activate(dev);
+
+       return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       struct mqprio_sched *priv = qdisc_priv(sch);
+       unsigned char *b = skb_tail_pointer(skb);
+       struct tc_mqprio_qopt opt;
+       struct Qdisc *qdisc;
+       unsigned int i;
+
+       sch->q.qlen = 0;
+       memset(&sch->bstats, 0, sizeof(sch->bstats));
+       memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+               spin_lock_bh(qdisc_lock(qdisc));
+               sch->q.qlen             += qdisc->q.qlen;
+               sch->bstats.bytes       += qdisc->bstats.bytes;
+               sch->bstats.packets     += qdisc->bstats.packets;
+               sch->qstats.qlen        += qdisc->qstats.qlen;
+               sch->qstats.backlog     += qdisc->qstats.backlog;
+               sch->qstats.drops       += qdisc->qstats.drops;
+               sch->qstats.requeues    += qdisc->qstats.requeues;
+               sch->qstats.overlimits  += qdisc->qstats.overlimits;
+               spin_unlock_bh(qdisc_lock(qdisc));
+       }
+
+       opt.num_tc = netdev_get_num_tc(dev);
+       memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+       opt.hw = priv->hw_owned;
+
+       for (i = 0; i < netdev_get_num_tc(dev); i++) {
+               opt.count[i] = dev->tc_to_txq[i].count;
+               opt.offset[i] = dev->tc_to_txq[i].offset;
+       }
+
+       NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+       return skb->len;
+nla_put_failure:
+       nlmsg_trim(skb, b);
+       return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+       struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+       if (!dev_queue)
+               return NULL;
+
+       return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned int ntx = TC_H_MIN(classid);
+
+       if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
+               return 0;
+       return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+                        struct sk_buff *skb, struct tcmsg *tcm)
+{
+       struct net_device *dev = qdisc_dev(sch);
+
+       if (cl <= netdev_get_num_tc(dev)) {
+               tcm->tcm_parent = TC_H_ROOT;
+               tcm->tcm_info = 0;
+       } else {
+               int i;
+               struct netdev_queue *dev_queue;
+
+               dev_queue = mqprio_queue_get(sch, cl);
+               tcm->tcm_parent = 0;
+               for (i = 0; i < netdev_get_num_tc(dev); i++) {
+                       struct netdev_tc_txq tc = dev->tc_to_txq[i];
+                       int q_idx = cl - netdev_get_num_tc(dev);
+
+                       if (q_idx > tc.offset &&
+                           q_idx <= tc.offset + tc.count) {
+                               tcm->tcm_parent =
+                                       TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                 TC_H_MIN(i + 1));
+                               break;
+                       }
+               }
+               tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+       }
+       tcm->tcm_handle |= TC_H_MIN(cl);
+       return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+                              struct gnet_dump *d)
+{
+       struct net_device *dev = qdisc_dev(sch);
+
+       if (cl <= netdev_get_num_tc(dev)) {
+               int i;
+               struct Qdisc *qdisc;
+               struct gnet_stats_queue qstats = {0};
+               struct gnet_stats_basic_packed bstats = {0};
+               struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+
+               /* Drop lock here it will be reclaimed before touching
+                * statistics this is required because the d->lock we
+                * hold here is the look on dev_queue->qdisc_sleeping
+                * also acquired below.
+                */
+               spin_unlock_bh(d->lock);
+
+               for (i = tc.offset; i < tc.offset + tc.count; i++) {
+                       qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+                       spin_lock_bh(qdisc_lock(qdisc));
+                       bstats.bytes      += qdisc->bstats.bytes;
+                       bstats.packets    += qdisc->bstats.packets;
+                       qstats.qlen       += qdisc->qstats.qlen;
+                       qstats.backlog    += qdisc->qstats.backlog;
+                       qstats.drops      += qdisc->qstats.drops;
+                       qstats.requeues   += qdisc->qstats.requeues;
+                       qstats.overlimits += qdisc->qstats.overlimits;
+                       spin_unlock_bh(qdisc_lock(qdisc));
+               }
+               /* Reclaim root sleeping lock before completing stats */
+               spin_lock_bh(d->lock);
+               if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+                   gnet_stats_copy_queue(d, &qstats) < 0)
+                       return -1;
+       } else {
+               struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+               sch = dev_queue->qdisc_sleeping;
+               sch->qstats.qlen = sch->q.qlen;
+               if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+                   gnet_stats_copy_queue(d, &sch->qstats) < 0)
+                       return -1;
+       }
+       return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+       struct net_device *dev = qdisc_dev(sch);
+       unsigned long ntx;
+
+       if (arg->stop)
+               return;
+
+       /* Walk hierarchy with a virtual class per tc */
+       arg->count = arg->skip;
+       for (ntx = arg->skip;
+            ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
+            ntx++) {
+               if (arg->fn(sch, ntx + 1, arg) < 0) {
+                       arg->stop = 1;
+                       break;
+               }
+               arg->count++;
+       }
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+       .graft          = mqprio_graft,
+       .leaf           = mqprio_leaf,
+       .get            = mqprio_get,
+       .put            = mqprio_put,
+       .walk           = mqprio_walk,
+       .dump           = mqprio_dump_class,
+       .dump_stats     = mqprio_dump_class_stats,
+};
+
+struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+       .cl_ops         = &mqprio_class_ops,
+       .id             = "mqprio",
+       .priv_size      = sizeof(struct mqprio_sched),
+       .init           = mqprio_init,
+       .destroy        = mqprio_destroy,
+       .attach         = mqprio_attach,
+       .dump           = mqprio_dump,
+       .owner          = THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+       return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+       unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");
index 436a2e7..edc1950 100644 (file)
@@ -156,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
        unsigned int len;
        struct Qdisc *qdisc;
 
-       for (band = q->bands-1; band >= 0; band--) {
+       for (band = q->bands - 1; band >= 0; band--) {
                qdisc = q->queues[band];
                if (qdisc->ops->drop) {
                        len = qdisc->ops->drop(qdisc);
@@ -265,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
        for (i = 0; i < q->max_bands; i++)
                q->queues[i] = &noop_qdisc;
 
-       err = multiq_tune(sch,opt);
+       err = multiq_tune(sch, opt);
 
        if (err)
                kfree(q->queues);
@@ -346,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
        struct multiq_sched_data *q = qdisc_priv(sch);
 
        tcm->tcm_handle |= TC_H_MIN(cl);
-       tcm->tcm_info = q->queues[cl-1]->handle;
+       tcm->tcm_info = q->queues[cl - 1]->handle;
        return 0;
 }
 
@@ -378,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                        arg->count++;
                        continue;
                }
-               if (arg->fn(sch, band+1, arg) < 0) {
+               if (arg->fn(sch, band + 1, arg) < 0) {
                        arg->stop = 1;
                        break;
                }
index 6a3006b..64f0d32 100644 (file)
@@ -211,8 +211,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        }
 
        cb = netem_skb_cb(skb);
-       if (q->gap == 0 ||              /* not doing reordering */
-           q->counter < q->gap ||      /* inside last reordering gap */
+       if (q->gap == 0 ||              /* not doing reordering */
+           q->counter < q->gap ||      /* inside last reordering gap */
            q->reorder < get_crandom(&q->reorder_cor)) {
                psched_time_t now;
                psched_tdiff_t delay;
@@ -248,7 +248,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        return ret;
 }
 
-static unsigned int netem_drop(struct Qdiscsch)
+static unsigned int netem_drop(struct Qdisc *sch)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
        unsigned int len = 0;
@@ -265,7 +265,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
        struct netem_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
 
-       if (sch->flags & TCQ_F_THROTTLED)
+       if (qdisc_is_throttled(sch))
                return NULL;
 
        skb = q->qdisc->ops->peek(q->qdisc);
index fbd710d..2a318f2 100644 (file)
@@ -22,8 +22,7 @@
 #include <net/pkt_sched.h>
 
 
-struct prio_sched_data
-{
+struct prio_sched_data {
        int bands;
        struct tcf_proto *filter_list;
        u8  prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                if (!q->filter_list || err < 0) {
                        if (TC_H_MAJ(band))
                                band = 0;
-                       return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+                       return q->queues[q->prio2band[band & TC_PRIO_MAX]];
                }
                band = res.classid;
        }
@@ -106,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
        return NULL;
 }
 
-static struct sk_buff *prio_dequeue(struct Qdiscsch)
+static struct sk_buff *prio_dequeue(struct Qdisc *sch)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
        int prio;
@@ -124,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 
 }
 
-static unsigned int prio_drop(struct Qdiscsch)
+static unsigned int prio_drop(struct Qdisc *sch)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
        int prio;
@@ -143,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
 
 
 static void
-prio_reset(struct Qdiscsch)
+prio_reset(struct Qdisc *sch)
 {
        int prio;
        struct prio_sched_data *q = qdisc_priv(sch);
 
-       for (prio=0; prio<q->bands; prio++)
+       for (prio = 0; prio < q->bands; prio++)
                qdisc_reset(q->queues[prio]);
        sch->q.qlen = 0;
 }
 
 static void
-prio_destroy(struct Qdiscsch)
+prio_destroy(struct Qdisc *sch)
 {
        int prio;
        struct prio_sched_data *q = qdisc_priv(sch);
 
        tcf_destroy_chain(&q->filter_list);
-       for (prio=0; prio<q->bands; prio++)
+       for (prio = 0; prio < q->bands; prio++)
                qdisc_destroy(q->queues[prio]);
 }
 
@@ -177,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
        if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
                return -EINVAL;
 
-       for (i=0; i<=TC_PRIO_MAX; i++) {
+       for (i = 0; i <= TC_PRIO_MAX; i++) {
                if (qopt->priomap[i] >= qopt->bands)
                        return -EINVAL;
        }
@@ -186,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
        q->bands = qopt->bands;
        memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-       for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+       for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
                struct Qdisc *child = q->queues[i];
                q->queues[i] = &noop_qdisc;
                if (child != &noop_qdisc) {
@@ -196,9 +195,10 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
        }
        sch_tree_unlock(sch);
 
-       for (i=0; i<q->bands; i++) {
+       for (i = 0; i < q->bands; i++) {
                if (q->queues[i] == &noop_qdisc) {
                        struct Qdisc *child, *old;
+
                        child = qdisc_create_dflt(sch->dev_queue,
                                                  &pfifo_qdisc_ops,
                                                  TC_H_MAKE(sch->handle, i + 1));
@@ -224,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
        struct prio_sched_data *q = qdisc_priv(sch);
        int i;
 
-       for (i=0; i<TCQ_PRIO_BANDS; i++)
+       for (i = 0; i < TCQ_PRIO_BANDS; i++)
                q->queues[i] = &noop_qdisc;
 
        if (opt == NULL) {
@@ -232,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
        } else {
                int err;
 
-               if ((err= prio_tune(sch, opt)) != 0)
+               if ((err = prio_tune(sch, opt)) != 0)
                        return err;
        }
        return 0;
@@ -245,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
        struct tc_prio_qopt opt;
 
        opt.bands = q->bands;
-       memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+       memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
 
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
@@ -342,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                        arg->count++;
                        continue;
                }
-               if (arg->fn(sch, prio+1, arg) < 0) {
+               if (arg->fn(sch, prio + 1, arg) < 0) {
                        arg->stop = 1;
                        break;
                }
@@ -350,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        }
 }
 
-static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
 
index 9f98dbd..6649463 100644 (file)
@@ -36,8 +36,7 @@
        if RED works correctly.
  */
 
-struct red_sched_data
-{
+struct red_sched_data {
        u32                     limit;          /* HARD maximal queue length */
        unsigned char           flags;
        struct red_parms        parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
        return q->flags & TC_RED_HARDDROP;
 }
 
-static int red_enqueue(struct sk_buff *skb, struct Qdiscsch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
@@ -67,29 +66,29 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                red_end_of_idle_period(&q->parms);
 
        switch (red_action(&q->parms, q->parms.qavg)) {
-               case RED_DONT_MARK:
-                       break;
-
-               case RED_PROB_MARK:
-                       sch->qstats.overlimits++;
-                       if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-                               q->stats.prob_drop++;
-                               goto congestion_drop;
-                       }
-
-                       q->stats.prob_mark++;
-                       break;
-
-               case RED_HARD_MARK:
-                       sch->qstats.overlimits++;
-                       if (red_use_harddrop(q) || !red_use_ecn(q) ||
-                           !INET_ECN_set_ce(skb)) {
-                               q->stats.forced_drop++;
-                               goto congestion_drop;
-                       }
-
-                       q->stats.forced_mark++;
-                       break;
+       case RED_DONT_MARK:
+               break;
+
+       case RED_PROB_MARK:
+               sch->qstats.overlimits++;
+               if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+                       q->stats.prob_drop++;
+                       goto congestion_drop;
+               }
+
+               q->stats.prob_mark++;
+               break;
+
+       case RED_HARD_MARK:
+               sch->qstats.overlimits++;
+               if (red_use_harddrop(q) || !red_use_ecn(q) ||
+                   !INET_ECN_set_ce(skb)) {
+                       q->stats.forced_drop++;
+                       goto congestion_drop;
+               }
+
+               q->stats.forced_mark++;
+               break;
        }
 
        ret = qdisc_enqueue(skb, child);
@@ -106,7 +105,7 @@ congestion_drop:
        return NET_XMIT_CN;
 }
 
-static struct sk_buff * red_dequeue(struct Qdisc* sch)
+static struct sk_buff *red_dequeue(struct Qdisc *sch)
 {
        struct sk_buff *skb;
        struct red_sched_data *q = qdisc_priv(sch);
@@ -123,7 +122,7 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
        return skb;
 }
 
-static struct sk_buff * red_peek(struct Qdisc* sch)
+static struct sk_buff *red_peek(struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
        return child->ops->peek(child);
 }
 
-static unsigned int red_drop(struct Qdiscsch)
+static unsigned int red_drop(struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
        return 0;
 }
 
-static void red_reset(struct Qdiscsch)
+static void red_reset(struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
 
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
        return 0;
 }
 
-static int red_init(struct Qdiscsch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct red_sched_data *q = qdisc_priv(sch);
 
index edea8ce..4cff442 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -76,7 +77,8 @@
 #define SFQ_DEPTH              128 /* max number of packets per flow */
 #define SFQ_SLOTS              128 /* max number of flows */
 #define SFQ_EMPTY_SLOT         255
-#define SFQ_HASH_DIVISOR       1024
+#define SFQ_DEFAULT_HASH_DIVISOR 1024
+
 /* We use 16 bits to store allot, and want to handle packets up to 64K
  * Scale allot by 8 (1<<3) so that no overflow occurs.
  */
@@ -92,8 +94,7 @@ typedef unsigned char sfq_index;
  * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
  * are 'pointers' to dep[] array
  */
-struct sfq_head
-{
+struct sfq_head {
        sfq_index       next;
        sfq_index       prev;
 };
@@ -108,13 +109,12 @@ struct sfq_slot {
        short           allot; /* credit for this slot */
 };
 
-struct sfq_sched_data
-{
+struct sfq_sched_data {
 /* Parameters */
        int             perturb_period;
-       unsigned        quantum;        /* Allotment per round: MUST BE >= MTU */
+       unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
        int             limit;
-
+       unsigned int    divisor;        /* number of slots in hash table */
 /* Variables */
        struct tcf_proto *filter_list;
        struct timer_list perturb_timer;
@@ -122,7 +122,7 @@ struct sfq_sched_data
        sfq_index       cur_depth;      /* depth of longest slot */
        unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
        struct sfq_slot *tail;          /* current slot in round */
-       sfq_index       ht[SFQ_HASH_DIVISOR];   /* Hash table */
+       sfq_index       *ht;            /* Hash table (divisor slots) */
        struct sfq_slot slots[SFQ_SLOTS];
        struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
 };
@@ -137,12 +137,12 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
        return &q->dep[val - SFQ_SLOTS];
 }
 
-static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
-       return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+       return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
 }
 
-static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 {
        u32 h, h2;
 
@@ -157,13 +157,13 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
                iph = ip_hdr(skb);
                h = (__force u32)iph->daddr;
                h2 = (__force u32)iph->saddr ^ iph->protocol;
-               if (iph->frag_off & htons(IP_MF|IP_OFFSET))
+               if (iph->frag_off & htons(IP_MF | IP_OFFSET))
                        break;
                poff = proto_ports_offset(iph->protocol);
                if (poff >= 0 &&
                    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
                        iph = ip_hdr(skb);
-                       h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
+                       h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
                }
                break;
        }
@@ -181,7 +181,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
                if (poff >= 0 &&
                    pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
                        iph = ipv6_hdr(skb);
-                       h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
+                       h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
                }
                break;
        }
@@ -203,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 
        if (TC_H_MAJ(skb->priority) == sch->handle &&
            TC_H_MIN(skb->priority) > 0 &&
-           TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+           TC_H_MIN(skb->priority) <= q->divisor)
                return TC_H_MIN(skb->priority);
 
        if (!q->filter_list)
@@ -221,7 +221,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
                        return 0;
                }
 #endif
-               if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+               if (TC_H_MIN(res.classid) <= q->divisor)
                        return TC_H_MIN(res.classid);
        }
        return 0;
@@ -497,7 +497,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
        q->perturb_period = ctl->perturb_period * HZ;
        if (ctl->limit)
                q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-
+       if (ctl->divisor) {
+               if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)
+                       return -EINVAL;
+               q->divisor = ctl->divisor;
+       }
        qlen = sch->q.qlen;
        while (sch->q.qlen > q->limit)
                sfq_drop(sch);
@@ -515,15 +519,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
+       size_t sz;
        int i;
 
        q->perturb_timer.function = sfq_perturbation;
        q->perturb_timer.data = (unsigned long)sch;
        init_timer_deferrable(&q->perturb_timer);
 
-       for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-               q->ht[i] = SFQ_EMPTY_SLOT;
-
        for (i = 0; i < SFQ_DEPTH; i++) {
                q->dep[i].next = i + SFQ_SLOTS;
                q->dep[i].prev = i + SFQ_SLOTS;
@@ -532,6 +534,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
        q->limit = SFQ_DEPTH - 1;
        q->cur_depth = 0;
        q->tail = NULL;
+       q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
        if (opt == NULL) {
                q->quantum = psched_mtu(qdisc_dev(sch));
                q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
@@ -543,10 +546,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
                        return err;
        }
 
+       sz = sizeof(q->ht[0]) * q->divisor;
+       q->ht = kmalloc(sz, GFP_KERNEL);
+       if (!q->ht && sz > PAGE_SIZE)
+               q->ht = vmalloc(sz);
+       if (!q->ht)
+               return -ENOMEM;
+       for (i = 0; i < q->divisor; i++)
+               q->ht[i] = SFQ_EMPTY_SLOT;
+
        for (i = 0; i < SFQ_SLOTS; i++) {
                slot_queue_init(&q->slots[i]);
                sfq_link(q, i);
        }
+       if (q->limit >= 1)
+               sch->flags |= TCQ_F_CAN_BYPASS;
+       else
+               sch->flags &= ~TCQ_F_CAN_BYPASS;
        return 0;
 }
 
@@ -557,6 +573,10 @@ static void sfq_destroy(struct Qdisc *sch)
        tcf_destroy_chain(&q->filter_list);
        q->perturb_period = 0;
        del_timer_sync(&q->perturb_timer);
+       if (is_vmalloc_addr(q->ht))
+               vfree(q->ht);
+       else
+               kfree(q->ht);
 }
 
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -569,7 +589,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
        opt.perturb_period = q->perturb_period / HZ;
 
        opt.limit = q->limit;
-       opt.divisor = SFQ_HASH_DIVISOR;
+       opt.divisor = q->divisor;
        opt.flows = q->limit;
 
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -594,6 +614,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
                              u32 classid)
 {
+       /* we cannot bypass queue discipline anymore */
+       sch->flags &= ~TCQ_F_CAN_BYPASS;
        return 0;
 }
 
@@ -647,7 +669,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        if (arg->stop)
                return;
 
-       for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+       for (i = 0; i < q->divisor; i++) {
                if (q->ht[i] == SFQ_EMPTY_SLOT ||
                    arg->count < arg->skip) {
                        arg->count++;
index e931658..1dcfb52 100644 (file)
@@ -97,8 +97,7 @@
        changed the limit is not effective anymore.
 */
 
-struct tbf_sched_data
-{
+struct tbf_sched_data {
 /* Parameters */
        u32             limit;          /* Maximal length of backlog: bytes */
        u32             buffer;         /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
        struct qdisc_watchdog watchdog; /* Watchdog timer */
 };
 
-#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
-#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
+#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
 
-static int tbf_enqueue(struct sk_buff *skb, struct Qdiscsch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
        int ret;
@@ -137,7 +136,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
        return NET_XMIT_SUCCESS;
 }
 
-static unsigned int tbf_drop(struct Qdiscsch)
+static unsigned int tbf_drop(struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
        unsigned int len = 0;
@@ -149,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
        return len;
 }
 
-static struct sk_buff *tbf_dequeue(struct Qdiscsch)
+static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
@@ -185,7 +184,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
                        q->tokens = toks;
                        q->ptokens = ptoks;
                        sch->q.qlen--;
-                       sch->flags &= ~TCQ_F_THROTTLED;
+                       qdisc_unthrottled(sch);
                        qdisc_bstats_update(sch, skb);
                        return skb;
                }
@@ -209,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
        return NULL;
 }
 
-static void tbf_reset(struct Qdiscsch)
+static void tbf_reset(struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -227,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
        [TCA_TBF_PTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 };
 
-static int tbf_change(struct Qdiscsch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 {
        int err;
        struct tbf_sched_data *q = qdisc_priv(sch);
@@ -236,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
        struct qdisc_rate_table *rtab = NULL;
        struct qdisc_rate_table *ptab = NULL;
        struct Qdisc *child = NULL;
-       int max_size,n;
+       int max_size, n;
 
        err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
        if (err < 0)
@@ -259,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
        }
 
        for (n = 0; n < 256; n++)
-               if (rtab->data[n] > qopt->buffer) break;
-       max_size = (n << qopt->rate.cell_log)-1;
+               if (rtab->data[n] > qopt->buffer)
+                       break;
+       max_size = (n << qopt->rate.cell_log) - 1;
        if (ptab) {
                int size;
 
                for (n = 0; n < 256; n++)
-                       if (ptab->data[n] > qopt->mtu) break;
-               size = (n << qopt->peakrate.cell_log)-1;
-               if (size < max_size) max_size = size;
+                       if (ptab->data[n] > qopt->mtu)
+                               break;
+               size = (n << qopt->peakrate.cell_log) - 1;
+               if (size < max_size)
+                       max_size = size;
        }
        if (max_size < 0)
                goto done;
@@ -310,7 +312,7 @@ done:
        return err;
 }
 
-static int tbf_init(struct Qdiscsch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
 
@@ -422,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
        }
 }
 
-static const struct Qdisc_class_ops tbf_class_ops =
-{
+static const struct Qdisc_class_ops tbf_class_ops = {
        .graft          =       tbf_graft,
        .leaf           =       tbf_leaf,
        .get            =       tbf_get,
index d84e732..45cd300 100644 (file)
@@ -53,8 +53,7 @@
       which will not break load balancing, though native slave
       traffic will have the highest priority.  */
 
-struct teql_master
-{
+struct teql_master {
        struct Qdisc_ops qops;
        struct net_device *dev;
        struct Qdisc *slaves;
@@ -65,22 +64,21 @@ struct teql_master
        unsigned long   tx_dropped;
 };
 
-struct teql_sched_data
-{
+struct teql_sched_data {
        struct Qdisc *next;
        struct teql_master *m;
        struct neighbour *ncache;
        struct sk_buff_head q;
 };
 
-#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
 
-#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
 
 /* "teql*" qdisc routines */
 
 static int
-teql_enqueue(struct sk_buff *skb, struct Qdiscsch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct net_device *dev = qdisc_dev(sch);
        struct teql_sched_data *q = qdisc_priv(sch);
@@ -96,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 }
 
 static struct sk_buff *
-teql_dequeue(struct Qdiscsch)
+teql_dequeue(struct Qdisc *sch)
 {
        struct teql_sched_data *dat = qdisc_priv(sch);
        struct netdev_queue *dat_queue;
@@ -118,13 +116,13 @@ teql_dequeue(struct Qdisc* sch)
 }
 
 static struct sk_buff *
-teql_peek(struct Qdiscsch)
+teql_peek(struct Qdisc *sch)
 {
        /* teql is meant to be used as root qdisc */
        return NULL;
 }
 
-static __inline__ void
+static inline void
 teql_neigh_release(struct neighbour *n)
 {
        if (n)
@@ -132,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
 }
 
 static void
-teql_reset(struct Qdiscsch)
+teql_reset(struct Qdisc *sch)
 {
        struct teql_sched_data *dat = qdisc_priv(sch);
 
@@ -142,13 +140,14 @@ teql_reset(struct Qdisc* sch)
 }
 
 static void
-teql_destroy(struct Qdiscsch)
+teql_destroy(struct Qdisc *sch)
 {
        struct Qdisc *q, *prev;
        struct teql_sched_data *dat = qdisc_priv(sch);
        struct teql_master *master = dat->m;
 
-       if ((prev = master->slaves) != NULL) {
+       prev = master->slaves;
+       if (prev) {
                do {
                        q = NEXT_SLAVE(prev);
                        if (q == sch) {
@@ -180,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct net_device *dev = qdisc_dev(sch);
-       struct teql_master *m = (struct teql_master*)sch->ops;
+       struct teql_master *m = (struct teql_master *)sch->ops;
        struct teql_sched_data *q = qdisc_priv(sch);
 
        if (dev->hard_header_len > m->dev->hard_header_len)
@@ -291,7 +290,8 @@ restart:
        nores = 0;
        busy = 0;
 
-       if ((q = start) == NULL)
+       q = start;
+       if (!q)
                goto drop;
 
        do {
@@ -356,10 +356,10 @@ drop:
 
 static int teql_master_open(struct net_device *dev)
 {
-       struct Qdisc * q;
+       struct Qdisc *q;
        struct teql_master *m = netdev_priv(dev);
        int mtu = 0xFFFE;
-       unsigned flags = IFF_NOARP|IFF_MULTICAST;
+       unsigned int flags = IFF_NOARP | IFF_MULTICAST;
 
        if (m->slaves == NULL)
                return -EUNATCH;
@@ -427,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
                do {
                        if (new_mtu > qdisc_dev(q)->mtu)
                                return -EINVAL;
-               } while ((q=NEXT_SLAVE(q)) != m->slaves);
+               } while ((q = NEXT_SLAVE(q)) != m->slaves);
        }
 
        dev->mtu = new_mtu;
index dd419d2..d8d98d5 100644 (file)
@@ -1475,6 +1475,12 @@ restart:
                        goto out_free;
        }
 
+       if (sk_filter(other, skb) < 0) {
+               /* Toss the packet but do not return any error to the sender */
+               err = len;
+               goto out_free;
+       }
+
        unix_state_lock(other);
        err = -EPERM;
        if (!unix_may_send(sk, other))
@@ -1978,36 +1984,38 @@ static int unix_shutdown(struct socket *sock, int mode)
 
        mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
 
-       if (mode) {
-               unix_state_lock(sk);
-               sk->sk_shutdown |= mode;
-               other = unix_peer(sk);
-               if (other)
-                       sock_hold(other);
-               unix_state_unlock(sk);
-               sk->sk_state_change(sk);
-
-               if (other &&
-                       (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
-
-                       int peer_mode = 0;
-
-                       if (mode&RCV_SHUTDOWN)
-                               peer_mode |= SEND_SHUTDOWN;
-                       if (mode&SEND_SHUTDOWN)
-                               peer_mode |= RCV_SHUTDOWN;
-                       unix_state_lock(other);
-                       other->sk_shutdown |= peer_mode;
-                       unix_state_unlock(other);
-                       other->sk_state_change(other);
-                       if (peer_mode == SHUTDOWN_MASK)
-                               sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
-                       else if (peer_mode & RCV_SHUTDOWN)
-                               sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-               }
-               if (other)
-                       sock_put(other);
+       if (!mode)
+               return 0;
+
+       unix_state_lock(sk);
+       sk->sk_shutdown |= mode;
+       other = unix_peer(sk);
+       if (other)
+               sock_hold(other);
+       unix_state_unlock(sk);
+       sk->sk_state_change(sk);
+
+       if (other &&
+               (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
+
+               int peer_mode = 0;
+
+               if (mode&RCV_SHUTDOWN)
+                       peer_mode |= SEND_SHUTDOWN;
+               if (mode&SEND_SHUTDOWN)
+                       peer_mode |= RCV_SHUTDOWN;
+               unix_state_lock(other);
+               other->sk_shutdown |= peer_mode;
+               unix_state_unlock(other);
+               other->sk_state_change(other);
+               if (peer_mode == SHUTDOWN_MASK)
+                       sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
+               else if (peer_mode & RCV_SHUTDOWN)
+                       sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
        }
+       if (other)
+               sock_put(other);
+
        return 0;
 }
 
index 74944a2..788a12c 100644 (file)
@@ -59,8 +59,6 @@
 #include <asm/uaccess.h>        /* copy_to/from_user */
 #include <linux/init.h>         /* __initfunc et al. */
 
-#define KMEM_SAFETYZONE 8
-
 #define DEV_TO_SLAVE(dev)      (*((struct net_device **)netdev_priv(dev)))
 
 /*