Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
authorLinus Torvalds <torvalds@g5.osdl.org>
Wed, 4 Oct 2006 15:26:19 +0000 (08:26 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Wed, 4 Oct 2006 15:26:19 +0000 (08:26 -0700)
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6:
  [XFRM]: BEET mode
  [TCP]: Kill warning in tcp_clean_rtx_queue().
  [NET_SCHED]: Remove old estimator implementation
  [ATM]: [zatm] always *pcr in alloc_shaper()
  [ATM]: [ambassador] Change the return type to reflect reality
  [ATM]: kmalloc to kzalloc patches for drivers/atm
  [TIPC]: fix printk warning
  [XFRM]: Clearing xfrm_policy_count[] to zero during flush is incorrect.
  [XFRM] STATE: Use destination address for src hash.
  [NEIGH]: always use hash_mask under tbl lock
  [UDP]: Fix MSG_PROBE crash
  [UDP6]: Fix flowi clobbering
  [NET_SCHED]: Revert "HTB: fix incorrect use of RB_EMPTY_NODE"
  [NETFILTER]: ebt_mark: add or/and/xor action support to mark target
  [NETFILTER]: ipt_REJECT: remove largely duplicate route_reverse function
  [NETFILTER]: Honour source routing for LVS-NAT
  [NETFILTER]: add type parameter to ip_route_me_harder
  [NETFILTER]: Kconfig: fix xt_physdev dependencies

41 files changed:
drivers/atm/adummy.c
drivers/atm/ambassador.c
drivers/atm/firestream.c
drivers/atm/he.c
drivers/atm/horizon.c
drivers/atm/idt77252.c
drivers/atm/lanai.c
drivers/atm/zatm.c
include/linux/in.h
include/linux/ip.h
include/linux/ipsec.h
include/linux/netfilter_bridge/ebt_mark_t.h
include/linux/netfilter_ipv4.h
include/linux/xfrm.h
net/bridge/netfilter/ebt_mark.c
net/core/neighbour.c
net/ipv4/Kconfig
net/ipv4/Makefile
net/ipv4/esp4.c
net/ipv4/ipcomp.c
net/ipv4/ipvs/ip_vs_core.c
net/ipv4/netfilter.c
net/ipv4/netfilter/ip_nat_standalone.c
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/tcp_input.c
net/ipv4/udp.c
net/ipv4/xfrm4_mode_beet.c [new file with mode: 0644]
net/ipv6/Kconfig
net/ipv6/Makefile
net/ipv6/ipcomp6.c
net/ipv6/udp.c
net/ipv6/xfrm6_mode_beet.c [new file with mode: 0644]
net/netfilter/Kconfig
net/sched/estimator.c [deleted file]
net/sched/sch_htb.c
net/tipc/link.c
net/xfrm/xfrm_hash.h
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c

index 6cc93de..ac2c108 100644 (file)
@@ -113,15 +113,13 @@ static int __init adummy_init(void)
 
        printk(KERN_ERR "adummy: version %s\n", DRV_VERSION);
 
-       adummy_dev = (struct adummy_dev *) kmalloc(sizeof(struct adummy_dev),
+       adummy_dev = kzalloc(sizeof(struct adummy_dev),
                                                   GFP_KERNEL);
        if (!adummy_dev) {
-               printk(KERN_ERR DEV_LABEL ": kmalloc() failed\n");
+               printk(KERN_ERR DEV_LABEL ": kzalloc() failed\n");
                err = -ENOMEM;
                goto out;
        }
-       memset(adummy_dev, 0, sizeof(struct adummy_dev));
-
        atm_dev = atm_dev_register(DEV_LABEL, &adummy_ops, -1, NULL);
        if (!atm_dev) {
                printk(KERN_ERR DEV_LABEL ": atm_dev_register() failed\n");
index 4521a24..da599e6 100644 (file)
@@ -915,8 +915,8 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id,
 
 /********** make rate (not quite as much fun as Horizon) **********/
 
-static unsigned int make_rate (unsigned int rate, rounding r,
-                              u16 * bits, unsigned int * actual) {
+static int make_rate (unsigned int rate, rounding r,
+                     u16 * bits, unsigned int * actual) {
   unsigned char exp = -1; // hush gcc
   unsigned int man = -1;  // hush gcc
   
index 38fc054..5f25e5e 100644 (file)
@@ -1784,7 +1784,7 @@ static int __devinit fs_init (struct fs_dev *dev)
                write_fs (dev, RAM, (1 << (28 - FS155_VPI_BITS - FS155_VCI_BITS)) - 1);
                dev->nchannels = FS155_NR_CHANNELS;
        }
-       dev->atm_vccs = kmalloc (dev->nchannels * sizeof (struct atm_vcc *), 
+       dev->atm_vccs = kcalloc (dev->nchannels, sizeof (struct atm_vcc *),
                                 GFP_KERNEL);
        fs_dprintk (FS_DEBUG_ALLOC, "Alloc atmvccs: %p(%Zd)\n",
                    dev->atm_vccs, dev->nchannels * sizeof (struct atm_vcc *));
@@ -1794,9 +1794,8 @@ static int __devinit fs_init (struct fs_dev *dev)
                /* XXX Clean up..... */
                return 1;
        }
-       memset (dev->atm_vccs, 0, dev->nchannels * sizeof (struct atm_vcc *));
 
-       dev->tx_inuse = kmalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
+       dev->tx_inuse = kzalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
        fs_dprintk (FS_DEBUG_ALLOC, "Alloc tx_inuse: %p(%d)\n", 
                    dev->atm_vccs, dev->nchannels / 8);
 
@@ -1805,8 +1804,6 @@ static int __devinit fs_init (struct fs_dev *dev)
                /* XXX Clean up..... */
                return 1;
        }
-       memset (dev->tx_inuse, 0, dev->nchannels / 8);
-
        /* -- RAS1 : FS155 and 50 differ. Default (0) should be OK for both */
        /* -- RAS2 : FS50 only: Default is OK. */
 
@@ -1893,14 +1890,11 @@ static int __devinit firestream_init_one (struct pci_dev *pci_dev,
        if (pci_enable_device(pci_dev)) 
                goto err_out;
 
-       fs_dev = kmalloc (sizeof (struct fs_dev), GFP_KERNEL);
+       fs_dev = kzalloc (sizeof (struct fs_dev), GFP_KERNEL);
        fs_dprintk (FS_DEBUG_ALLOC, "Alloc fs-dev: %p(%Zd)\n",
                    fs_dev, sizeof (struct fs_dev));
        if (!fs_dev)
                goto err_out;
-
-       memset (fs_dev, 0, sizeof (struct fs_dev));
-  
        atm_dev = atm_dev_register("fs", &ops, -1, NULL);
        if (!atm_dev)
                goto err_out_free_fs_dev;
index f2511b4..b22a914 100644 (file)
@@ -383,14 +383,12 @@ he_init_one(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
        }
        pci_set_drvdata(pci_dev, atm_dev);
 
-       he_dev = (struct he_dev *) kmalloc(sizeof(struct he_dev),
+       he_dev = kzalloc(sizeof(struct he_dev),
                                                        GFP_KERNEL);
        if (!he_dev) {
                err = -ENOMEM;
                goto init_one_failure;
        }
-       memset(he_dev, 0, sizeof(struct he_dev));
-
        he_dev->pci_dev = pci_dev;
        he_dev->atm_dev = atm_dev;
        he_dev->atm_dev->dev_data = he_dev;
index d1113e8..209dba1 100644 (file)
@@ -2719,7 +2719,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
                goto out_disable;
        }
 
-       dev = kmalloc(sizeof(hrz_dev), GFP_KERNEL);
+       dev = kzalloc(sizeof(hrz_dev), GFP_KERNEL);
        if (!dev) {
                // perhaps we should be nice: deregister all adapters and abort?
                PRINTD(DBG_ERR, "out of memory");
@@ -2727,8 +2727,6 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
                goto out_release;
        }
 
-       memset(dev, 0, sizeof(hrz_dev));
-
        pci_set_drvdata(pci_dev, dev);
 
        // grab IRQ and install handler - move this someplace more sensible
index b0369bb..7487f0a 100644 (file)
@@ -642,11 +642,9 @@ alloc_scq(struct idt77252_dev *card, int class)
 {
        struct scq_info *scq;
 
-       scq = (struct scq_info *) kmalloc(sizeof(struct scq_info), GFP_KERNEL);
+       scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
        if (!scq)
                return NULL;
-       memset(scq, 0, sizeof(struct scq_info));
-
        scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE,
                                         &scq->paddr);
        if (scq->base == NULL) {
@@ -2142,11 +2140,9 @@ idt77252_init_est(struct vc_map *vc, int pcr)
 {
        struct rate_estimator *est;
 
-       est = kmalloc(sizeof(struct rate_estimator), GFP_KERNEL);
+       est = kzalloc(sizeof(struct rate_estimator), GFP_KERNEL);
        if (!est)
                return NULL;
-       memset(est, 0, sizeof(*est));
-
        est->maxcps = pcr < 0 ? -pcr : pcr;
        est->cps = est->maxcps;
        est->avcps = est->cps << 5;
@@ -2451,14 +2447,12 @@ idt77252_open(struct atm_vcc *vcc)
 
        index = VPCI2VC(card, vpi, vci);
        if (!card->vcs[index]) {
-               card->vcs[index] = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
+               card->vcs[index] = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
                if (!card->vcs[index]) {
                        printk("%s: can't alloc vc in open()\n", card->name);
                        up(&card->mutex);
                        return -ENOMEM;
                }
-               memset(card->vcs[index], 0, sizeof(struct vc_map));
-
                card->vcs[index]->card = card;
                card->vcs[index]->index = index;
 
@@ -2926,13 +2920,11 @@ open_card_oam(struct idt77252_dev *card)
                for (vci = 3; vci < 5; vci++) {
                        index = VPCI2VC(card, vpi, vci);
 
-                       vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
+                       vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
                        if (!vc) {
                                printk("%s: can't alloc vc\n", card->name);
                                return -ENOMEM;
                        }
-                       memset(vc, 0, sizeof(struct vc_map));
-
                        vc->index = index;
                        card->vcs[index] = vc;
 
@@ -2995,12 +2987,11 @@ open_card_ubr0(struct idt77252_dev *card)
 {
        struct vc_map *vc;
 
-       vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
+       vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
        if (!vc) {
                printk("%s: can't alloc vc\n", card->name);
                return -ENOMEM;
        }
-       memset(vc, 0, sizeof(struct vc_map));
        card->vcs[0] = vc;
        vc->class = SCHED_UBR0;
 
@@ -3695,14 +3686,12 @@ idt77252_init_one(struct pci_dev *pcidev, const struct pci_device_id *id)
                goto err_out_disable_pdev;
        }
 
-       card = kmalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
+       card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
        if (!card) {
                printk("idt77252-%d: can't allocate private data\n", index);
                err = -ENOMEM;
                goto err_out_disable_pdev;
        }
-       memset(card, 0, sizeof(struct idt77252_dev));
-
        card->revision = revision;
        card->index = index;
        card->pcidev = pcidev;
index fe60a59..b9568e1 100644 (file)
@@ -1482,16 +1482,10 @@ static inline void vcc_table_deallocate(const struct lanai_dev *lanai)
 static inline struct lanai_vcc *new_lanai_vcc(void)
 {
        struct lanai_vcc *lvcc;
-       lvcc = (struct lanai_vcc *) kmalloc(sizeof(*lvcc), GFP_KERNEL);
+       lvcc =  kzalloc(sizeof(*lvcc), GFP_KERNEL);
        if (likely(lvcc != NULL)) {
-               lvcc->vbase = NULL;
-               lvcc->rx.atmvcc = lvcc->tx.atmvcc = NULL;
-               lvcc->nref = 0;
-               memset(&lvcc->stats, 0, sizeof lvcc->stats);
-               lvcc->rx.buf.start = lvcc->tx.buf.start = NULL;
                skb_queue_head_init(&lvcc->tx.backlog);
 #ifdef DEBUG
-               lvcc->tx.unqueue = NULL;
                lvcc->vci = -1;
 #endif
        }
index 2c65e82..083c5d3 100644 (file)
@@ -603,9 +603,8 @@ static int start_rx(struct atm_dev *dev)
 DPRINTK("start_rx\n");
        zatm_dev = ZATM_DEV(dev);
        size = sizeof(struct atm_vcc *)*zatm_dev->chans;
-       zatm_dev->rx_map = (struct atm_vcc **) kmalloc(size,GFP_KERNEL);
+       zatm_dev->rx_map =  kzalloc(size,GFP_KERNEL);
        if (!zatm_dev->rx_map) return -ENOMEM;
-       memset(zatm_dev->rx_map,0,size);
        /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */
        zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR);
        /* prepare free buffer pools */
@@ -801,6 +800,7 @@ static int alloc_shaper(struct atm_dev *dev,int *pcr,int min,int max,int ubr)
                i = m = 1;
                zatm_dev->ubr_ref_cnt++;
                zatm_dev->ubr = shaper;
+               *pcr = 0;
        }
        else {
                if (min) {
@@ -951,9 +951,8 @@ static int open_tx_first(struct atm_vcc *vcc)
        skb_queue_head_init(&zatm_vcc->tx_queue);
        init_waitqueue_head(&zatm_vcc->tx_wait);
        /* initialize ring */
-       zatm_vcc->ring = kmalloc(RING_SIZE,GFP_KERNEL);
+       zatm_vcc->ring = kzalloc(RING_SIZE,GFP_KERNEL);
        if (!zatm_vcc->ring) return -ENOMEM;
-       memset(zatm_vcc->ring,0,RING_SIZE);
        loop = zatm_vcc->ring+RING_ENTRIES*RING_WORDS;
        loop[0] = uPD98401_TXPD_V;
        loop[1] = loop[2] = 0;
index d79fc75..2619859 100644 (file)
@@ -40,6 +40,7 @@ enum {
 
   IPPROTO_ESP = 50,            /* Encapsulation Security Payload protocol */
   IPPROTO_AH = 51,             /* Authentication Header protocol       */
+  IPPROTO_BEETPH = 94,        /* IP option pseudo header for BEET */
   IPPROTO_PIM    = 103,                /* Protocol Independent Multicast       */
 
   IPPROTO_COMP   = 108,                /* Compression Header protocol */
index 6b25d36..ecee9bb 100644 (file)
@@ -80,6 +80,8 @@
 #define        IPOPT_TS_TSANDADDR      1               /* timestamps and addresses */
 #define        IPOPT_TS_PRESPEC        3               /* specified modules only */
 
+#define IPV4_BEET_PHMAXLEN 8
+
 struct iphdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
        __u8    ihl:4,
@@ -123,4 +125,11 @@ struct ip_comp_hdr {
        __be16 cpi;
 };
 
+struct ip_beet_phdr {
+       __u8 nexthdr;
+       __u8 hdrlen;
+       __u8 padlen;
+       __u8 reserved;
+};
+
 #endif /* _LINUX_IP_H */
index d3c5276..d17a630 100644 (file)
@@ -12,7 +12,8 @@
 enum {
        IPSEC_MODE_ANY          = 0,    /* We do not support this for SA */
        IPSEC_MODE_TRANSPORT    = 1,
-       IPSEC_MODE_TUNNEL       = 2
+       IPSEC_MODE_TUNNEL       = 2,
+       IPSEC_MODE_BEET         = 3
 };
 
 enum {
index 110fec6..6270f6f 100644 (file)
@@ -1,6 +1,18 @@
 #ifndef __LINUX_BRIDGE_EBT_MARK_T_H
 #define __LINUX_BRIDGE_EBT_MARK_T_H
 
+/* The target member is reused for adding new actions, the
+ * value of the real target is -1 to -NUM_STANDARD_TARGETS.
+ * For backward compatibility, the 4 lsb (2 would be enough,
+ * but let's play it safe) are kept to designate this target.
+ * The remaining bits designate the action. By making the set
+ * action 0xfffffff0, the result will look ok for older
+ * versions. [September 2006] */
+#define MARK_SET_VALUE (0xfffffff0)
+#define MARK_OR_VALUE  (0xffffffe0)
+#define MARK_AND_VALUE (0xffffffd0)
+#define MARK_XOR_VALUE (0xffffffc0)
+
 struct ebt_mark_t_info
 {
        unsigned long mark;
index ce02c98..5b63a23 100644 (file)
@@ -77,7 +77,7 @@ enum nf_ip_hook_priorities {
 #define SO_ORIGINAL_DST 80
 
 #ifdef __KERNEL__
-extern int ip_route_me_harder(struct sk_buff **pskb);
+extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
 extern int ip_xfrm_me_harder(struct sk_buff **pskb);
 extern unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
                                   unsigned int dataoff, u_int8_t protocol);
index 430afd0..8ae7f74 100644 (file)
@@ -129,7 +129,8 @@ enum
 #define XFRM_MODE_TUNNEL 1
 #define XFRM_MODE_ROUTEOPTIMIZATION 2
 #define XFRM_MODE_IN_TRIGGER 3
-#define XFRM_MODE_MAX 4
+#define XFRM_MODE_BEET 4
+#define XFRM_MODE_MAX 5
 
 /* Netlink configuration messages.  */
 enum {
index 770c0df..b54306a 100644 (file)
@@ -22,24 +22,37 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
    const void *data, unsigned int datalen)
 {
        struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
+       int action = info->target & -16;
 
-       if ((*pskb)->nfmark != info->mark)
+       if (action == MARK_SET_VALUE)
                (*pskb)->nfmark = info->mark;
+       else if (action == MARK_OR_VALUE)
+               (*pskb)->nfmark |= info->mark;
+       else if (action == MARK_AND_VALUE)
+               (*pskb)->nfmark &= info->mark;
+       else
+               (*pskb)->nfmark ^= info->mark;
 
-       return info->target;
+       return info->target | -16;
 }
 
 static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
    const struct ebt_entry *e, void *data, unsigned int datalen)
 {
        struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
+       int tmp;
 
        if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
                return -EINVAL;
-       if (BASE_CHAIN && info->target == EBT_RETURN)
+       tmp = info->target | -16;
+       if (BASE_CHAIN && tmp == EBT_RETURN)
                return -EINVAL;
        CLEAR_BASE_CHAIN_BIT;
-       if (INVALID_TARGET)
+       if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
+               return -EINVAL;
+       tmp = info->target & -16;
+       if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
+           tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
                return -EINVAL;
        return 0;
 }
index 8ce8c47..b4b4783 100644 (file)
@@ -344,12 +344,12 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 {
        struct neighbour *n;
        int key_len = tbl->key_len;
-       u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
+       u32 hash_val = tbl->hash(pkey, dev);
        
        NEIGH_CACHE_STAT_INC(tbl, lookups);
 
        read_lock_bh(&tbl->lock);
-       for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+       for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
                        neigh_hold(n);
                        NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -364,12 +364,12 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
 {
        struct neighbour *n;
        int key_len = tbl->key_len;
-       u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
+       u32 hash_val = tbl->hash(pkey, NULL);
 
        NEIGH_CACHE_STAT_INC(tbl, lookups);
 
        read_lock_bh(&tbl->lock);
-       for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+       for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
                if (!memcmp(n->primary_key, pkey, key_len)) {
                        neigh_hold(n);
                        NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -1998,12 +1998,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
        int rc, h, s_h = cb->args[1];
        int idx, s_idx = idx = cb->args[2];
 
+       read_lock_bh(&tbl->lock);
        for (h = 0; h <= tbl->hash_mask; h++) {
                if (h < s_h)
                        continue;
                if (h > s_h)
                        s_idx = 0;
-               read_lock_bh(&tbl->lock);
                for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
                        if (idx < s_idx)
                                continue;
@@ -2016,8 +2016,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
                                goto out;
                        }
                }
-               read_unlock_bh(&tbl->lock);
        }
+       read_unlock_bh(&tbl->lock);
        rc = skb->len;
 out:
        cb->args[1] = h;
index d172a98..5572071 100644 (file)
@@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL
 
          If unsure, say Y.
 
+config INET_XFRM_MODE_BEET
+       tristate "IP: IPsec BEET mode"
+       default y
+       select XFRM
+       ---help---
+         Support for IPsec BEET mode.
+
+         If unsure, say Y.
+
 config INET_DIAG
        tristate "INET: socket monitoring interface"
        default y
index f66049e..15645c5 100644 (file)
@@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o
 obj-$(CONFIG_INET_ESP) += esp4.o
 obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
 obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
+obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
 obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
 obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
 obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
index 13b2936..b5c205b 100644 (file)
@@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
                 *    as per draft-ietf-ipsec-udp-encaps-06,
                 *    section 3.1.2
                 */
-               if (x->props.mode == XFRM_MODE_TRANSPORT)
+               if (x->props.mode == XFRM_MODE_TRANSPORT ||
+                   x->props.mode == XFRM_MODE_BEET)
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
        }
 
@@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
 {
        struct esp_data *esp = x->data;
        u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
-
-       if (x->props.mode == XFRM_MODE_TUNNEL) {
-               mtu = ALIGN(mtu + 2, blksize);
-       } else {
-               /* The worst case. */
+       int enclen = 0;
+
+       switch (x->props.mode) {
+       case XFRM_MODE_TUNNEL:
+               mtu = ALIGN(mtu +2, blksize);
+               break;
+       default:
+       case XFRM_MODE_TRANSPORT:
+               /* The worst case */
                mtu = ALIGN(mtu + 2, 4) + blksize - 4;
+               break;
+       case XFRM_MODE_BEET:
+               /* The worst case. */
+               enclen = IPV4_BEET_PHMAXLEN;
+               mtu = ALIGN(mtu + enclen + 2, blksize);
+               break;
        }
+
        if (esp->conf.padlen)
                mtu = ALIGN(mtu, esp->conf.padlen);
 
-       return mtu + x->props.header_len + esp->auth.icv_trunc_len;
+       return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
index 2017d36..3839b70 100644 (file)
@@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 {
        struct xfrm_state *t;
+       u8 mode = XFRM_MODE_TUNNEL;
        
        t = xfrm_state_alloc();
        if (t == NULL)
@@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
        t->id.daddr.a4 = x->id.daddr.a4;
        memcpy(&t->sel, &x->sel, sizeof(t->sel));
        t->props.family = AF_INET;
-       t->props.mode = XFRM_MODE_TUNNEL;
+       if (x->props.mode == XFRM_MODE_BEET)
+               mode = x->props.mode;
+       t->props.mode = mode;
        t->props.saddr.a4 = x->props.saddr.a4;
        t->props.flags = x->props.flags;
 
index 6dee039..1445bb4 100644 (file)
@@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
        skb->nh.iph->saddr = cp->vaddr;
        ip_send_check(skb->nh.iph);
 
+       /* For policy routing, packets originating from this
+        * machine itself may be routed differently to packets
+        * passing through.  We want this packet to be routed as
+        * if it came from this machine itself.  So re-compute
+        * the routing information.
+        */
+       if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
+               goto drop;
+       skb = *pskb;
+
        IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
 
        ip_vs_out_stats(cp, skb);
index 5ac1537..e2005c6 100644 (file)
@@ -8,7 +8,7 @@
 #include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff **pskb)
+int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
 {
        struct iphdr *iph = (*pskb)->nh.iph;
        struct rtable *rt;
@@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
        struct dst_entry *odst;
        unsigned int hh_len;
 
+       if (addr_type == RTN_UNSPEC)
+               addr_type = inet_addr_type(iph->saddr);
+
        /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
         * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
         */
-       if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+       if (addr_type == RTN_LOCAL) {
                fl.nl_u.ip4_u.daddr = iph->daddr;
                fl.nl_u.ip4_u.saddr = iph->saddr;
                fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
@@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
                if (!(iph->tos == rt_info->tos
                      && iph->daddr == rt_info->daddr
                      && iph->saddr == rt_info->saddr))
-                       return ip_route_me_harder(pskb);
+                       return ip_route_me_harder(pskb, RTN_UNSPEC);
        }
        return 0;
 }
index 021395b..d85d2de 100644 (file)
@@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum,
                       ct->tuplehash[!dir].tuple.src.u.all
 #endif
                    )
-                       return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+                       if (ip_route_me_harder(pskb, RTN_UNSPEC))
+                               ret = NF_DROP;
        }
        return ret;
 }
index fd0c05e..ad0312d 100644 (file)
@@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module");
 #define DEBUGP(format, args...)
 #endif
 
-static inline struct rtable *route_reverse(struct sk_buff *skb, 
-                                          struct tcphdr *tcph, int hook)
-{
-       struct iphdr *iph = skb->nh.iph;
-       struct dst_entry *odst;
-       struct flowi fl = {};
-       struct rtable *rt;
-
-       /* We don't require ip forwarding to be enabled to be able to
-        * send a RST reply for bridged traffic. */
-       if (hook != NF_IP_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
-           || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
-          ) {
-               fl.nl_u.ip4_u.daddr = iph->saddr;
-               if (hook == NF_IP_LOCAL_IN)
-                       fl.nl_u.ip4_u.saddr = iph->daddr;
-               fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-
-               if (ip_route_output_key(&rt, &fl) != 0)
-                       return NULL;
-       } else {
-               /* non-local src, find valid iif to satisfy
-                * rp-filter when calling ip_route_input. */
-               fl.nl_u.ip4_u.daddr = iph->daddr;
-               if (ip_route_output_key(&rt, &fl) != 0)
-                       return NULL;
-
-               odst = skb->dst;
-               if (ip_route_input(skb, iph->saddr, iph->daddr,
-                                  RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-                       dst_release(&rt->u.dst);
-                       return NULL;
-               }
-               dst_release(&rt->u.dst);
-               rt = (struct rtable *)skb->dst;
-               skb->dst = odst;
-
-               fl.nl_u.ip4_u.daddr = iph->saddr;
-               fl.nl_u.ip4_u.saddr = iph->daddr;
-               fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-       }
-
-       if (rt->u.dst.error) {
-               dst_release(&rt->u.dst);
-               return NULL;
-       }
-
-       fl.proto = IPPROTO_TCP;
-       fl.fl_ip_sport = tcph->dest;
-       fl.fl_ip_dport = tcph->source;
-       security_skb_classify_flow(skb, &fl);
-
-       xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
-
-       return rt;
-}
-
 /* Send RST reply */
 static void send_reset(struct sk_buff *oldskb, int hook)
 {
        struct sk_buff *nskb;
        struct iphdr *iph = oldskb->nh.iph;
        struct tcphdr _otcph, *oth, *tcph;
-       struct rtable *rt;
        __be16 tmp_port;
        __be32 tmp_addr;
        int needs_ack;
-       int hh_len;
+       unsigned int addr_type;
 
        /* IP header checks: fragment. */
        if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
@@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
        if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
                return;
 
-       if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
-               return;
-
-       hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
        /* We need a linear, writeable skb.  We also need to expand
           headroom in case hh_len of incoming interface < hh_len of
           outgoing interface */
-       nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
+       nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
                               GFP_ATOMIC);
-       if (!nskb) {
-               dst_release(&rt->u.dst);
+       if (!nskb)
                return;
-       }
-
-       dst_release(nskb->dst);
-       nskb->dst = &rt->u.dst;
 
        /* This packet will not be the same as the other: clear nf fields */
        nf_reset(nskb);
@@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
        tcph->window = 0;
        tcph->urg_ptr = 0;
 
+       /* Set DF, id = 0 */
+       nskb->nh.iph->frag_off = htons(IP_DF);
+       nskb->nh.iph->id = 0;
+
+       addr_type = RTN_UNSPEC;
+       if (hook != NF_IP_FORWARD
+#ifdef CONFIG_BRIDGE_NETFILTER
+           || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
+#endif
+          )
+               addr_type = RTN_LOCAL;
+
+       if (ip_route_me_harder(&nskb, addr_type))
+               goto free_nskb;
+
        /* Adjust TCP checksum */
        nskb->ip_summed = CHECKSUM_NONE;
        tcph->check = 0;
@@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
                                   nskb->nh.iph->daddr,
                                   csum_partial((char *)tcph,
                                                sizeof(struct tcphdr), 0));
-
-       /* Adjust IP TTL, DF */
+       /* Adjust IP TTL */
        nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
-       /* Set DF, id = 0 */
-       nskb->nh.iph->frag_off = htons(IP_DF);
-       nskb->nh.iph->id = 0;
 
        /* Adjust IP checksum */
        nskb->nh.iph->check = 0;
index e62ea2b..b91f358 100644 (file)
@@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook,
                || (*pskb)->nfmark != nfmark
 #endif
                || (*pskb)->nh.iph->tos != tos))
-               return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+               if (ip_route_me_harder(pskb, RTN_UNSPEC))
+                       ret = NF_DROP;
 
        return ret;
 }
index 3f884ce..cf06acc 100644 (file)
@@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
        u32 pkts_acked = 0;
        void (*rtt_sample)(struct sock *sk, u32 usrtt)
                = icsk->icsk_ca_ops->rtt_sample;
-       struct timeval tv;
+       struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
 
        while ((skb = skb_peek(&sk->sk_write_queue)) &&
               skb != sk->sk_send_head) {
index 6d6142f..865d752 100644 (file)
@@ -675,6 +675,8 @@ do_append_data:
                udp_flush_pending_frames(sk);
        else if (!corkreq)
                err = udp_push_pending_frames(sk, up);
+       else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+               up->pending = 0;
        release_sock(sk);
 
 out:
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c
new file mode 100644 (file)
index 0000000..89cf59e
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ *                    Miika Komu     <miika@iki.fi>
+ *                    Herbert Xu     <herbert@gondor.apana.org.au>
+ *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
+ *                    Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ * The following fields in it shall be filled in by x->type->output:
+ *      tot_len
+ *      check
+ *
+ * On exit, skb->h will be set to the start of the payload to be processed
+ * by x->type->output and skb->nh will be set to the top IP header.
+ */
+static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+       struct iphdr *iph, *top_iph = NULL;
+       int hdrlen, optlen;
+
+       iph = skb->nh.iph;
+       skb->h.ipiph = iph;
+
+       hdrlen = 0;
+       optlen = iph->ihl * 4 - sizeof(*iph);
+       if (unlikely(optlen))
+               hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
+
+       skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
+       top_iph = skb->nh.iph;
+       hdrlen = iph->ihl * 4 - optlen;
+       skb->h.raw += hdrlen;
+
+       memmove(top_iph, iph, hdrlen);
+       if (unlikely(optlen)) {
+               struct ip_beet_phdr *ph;
+
+               BUG_ON(optlen < 0);
+
+               ph = (struct ip_beet_phdr *)skb->h.raw;
+               ph->padlen = 4 - (optlen & 4);
+               ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
+               ph->nexthdr = top_iph->protocol;
+
+               top_iph->protocol = IPPROTO_BEETPH;
+               top_iph->ihl = sizeof(struct iphdr) / 4;
+       }
+
+       top_iph->saddr = x->props.saddr.a4;
+       top_iph->daddr = x->id.daddr.a4;
+
+       return 0;
+}
+
+static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+       struct iphdr *iph = skb->nh.iph;
+       int phlen = 0;
+       int optlen = 0;
+       __u8 ph_nexthdr = 0, protocol = 0;
+       int err = -EINVAL;
+
+       protocol = iph->protocol;
+
+       if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
+               struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
+
+               if (!pskb_may_pull(skb, sizeof(*ph)))
+                       goto out;
+
+               phlen = ph->hdrlen * 8;
+               optlen = phlen - ph->padlen - sizeof(*ph);
+               if (optlen < 0 || optlen & 3 || optlen > 250)
+                       goto out;
+
+               if (!pskb_may_pull(skb, phlen))
+                       goto out;
+
+               ph_nexthdr = ph->nexthdr;
+       }
+
+       skb_push(skb, sizeof(*iph) - phlen + optlen);
+       memmove(skb->data, skb->nh.raw, sizeof(*iph));
+       skb->nh.raw = skb->data;
+
+       iph = skb->nh.iph;
+       iph->ihl = (sizeof(*iph) + optlen) / 4;
+       iph->tot_len = htons(skb->len);
+       iph->daddr = x->sel.daddr.a4;
+       iph->saddr = x->sel.saddr.a4;
+       if (ph_nexthdr)
+               iph->protocol = ph_nexthdr;
+       else
+               iph->protocol = protocol;
+       iph->check = 0;
+       iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+       err = 0;
+out:
+       return err;
+}
+
+static struct xfrm_mode xfrm4_beet_mode = {
+       .input = xfrm4_beet_input,
+       .output = xfrm4_beet_output,
+       .owner = THIS_MODULE,
+       .encap = XFRM_MODE_BEET,
+};
+
+static int __init xfrm4_beet_init(void)
+{
+       return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
+}
+
+static void __exit xfrm4_beet_exit(void)
+{
+       int err;
+
+       err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
+       BUG_ON(err);
+}
+
+module_init(xfrm4_beet_init);
+module_exit(xfrm4_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);
index a2d211d..a460e81 100644 (file)
@@ -136,6 +136,16 @@ config INET6_XFRM_MODE_TUNNEL
 
          If unsure, say Y.
 
+config INET6_XFRM_MODE_BEET
+       tristate "IPv6: IPsec BEET mode"
+       depends on IPV6
+       default IPV6
+       select XFRM
+       ---help---
+         Support for IPsec BEET mode.
+
+         If unsure, say Y.
+
 config INET6_XFRM_MODE_ROUTEOPTIMIZATION
        tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
        depends on IPV6 && EXPERIMENTAL
index 0213c66..87274e4 100644 (file)
@@ -26,6 +26,7 @@ obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
 obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
 obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
 obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
+obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
 obj-$(CONFIG_NETFILTER)        += netfilter/
 
 obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
index a2860e3..71f59f1 100644 (file)
@@ -199,6 +199,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 {
        struct xfrm_state *t = NULL;
+       u8 mode = XFRM_MODE_TUNNEL;
 
        t = xfrm_state_alloc();
        if (!t)
@@ -212,7 +213,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
        memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
        memcpy(&t->sel, &x->sel, sizeof(t->sel));
        t->props.family = AF_INET6;
-       t->props.mode = XFRM_MODE_TUNNEL;
+       if (x->props.mode == XFRM_MODE_BEET)
+               mode = x->props.mode;
+       t->props.mode = mode;
        memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
 
        if (xfrm_init_state(t))
index 9662561..e0c3934 100644 (file)
@@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
        struct in6_addr *daddr, *final_p = NULL, final;
        struct ipv6_txoptions *opt = NULL;
        struct ip6_flowlabel *flowlabel = NULL;
-       struct flowi *fl = &inet->cork.fl;
+       struct flowi fl;
        struct dst_entry *dst;
        int addr_len = msg->msg_namelen;
        int ulen = len;
@@ -626,19 +626,19 @@ do_udp_sendmsg:
        }
        ulen += sizeof(struct udphdr);
 
-       memset(fl, 0, sizeof(*fl));
+       memset(&fl, 0, sizeof(fl));
 
        if (sin6) {
                if (sin6->sin6_port == 0)
                        return -EINVAL;
 
-               fl->fl_ip_dport = sin6->sin6_port;
+               fl.fl_ip_dport = sin6->sin6_port;
                daddr = &sin6->sin6_addr;
 
                if (np->sndflow) {
-                       fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-                       if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-                               flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+                       fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+                       if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+                               flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
                                if (flowlabel == NULL)
                                        return -EINVAL;
                                daddr = &flowlabel->dst;
@@ -656,32 +656,32 @@ do_udp_sendmsg:
                if (addr_len >= sizeof(struct sockaddr_in6) &&
                    sin6->sin6_scope_id &&
                    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-                       fl->oif = sin6->sin6_scope_id;
+                       fl.oif = sin6->sin6_scope_id;
        } else {
                if (sk->sk_state != TCP_ESTABLISHED)
                        return -EDESTADDRREQ;
 
-               fl->fl_ip_dport = inet->dport;
+               fl.fl_ip_dport = inet->dport;
                daddr = &np->daddr;
-               fl->fl6_flowlabel = np->flow_label;
+               fl.fl6_flowlabel = np->flow_label;
                connected = 1;
        }
 
-       if (!fl->oif)
-               fl->oif = sk->sk_bound_dev_if;
+       if (!fl.oif)
+               fl.oif = sk->sk_bound_dev_if;
 
        if (msg->msg_controllen) {
                opt = &opt_space;
                memset(opt, 0, sizeof(struct ipv6_txoptions));
                opt->tot_len = sizeof(*opt);
 
-               err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
+               err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
                if (err < 0) {
                        fl6_sock_release(flowlabel);
                        return err;
                }
-               if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-                       flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+               if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+                       flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
                        if (flowlabel == NULL)
                                return -EINVAL;
                }
@@ -695,39 +695,39 @@ do_udp_sendmsg:
                opt = fl6_merge_options(&opt_space, flowlabel, opt);
        opt = ipv6_fixup_options(&opt_space, opt);
 
-       fl->proto = IPPROTO_UDP;
-       ipv6_addr_copy(&fl->fl6_dst, daddr);
-       if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
-               ipv6_addr_copy(&fl->fl6_src, &np->saddr);
-       fl->fl_ip_sport = inet->sport;
+       fl.proto = IPPROTO_UDP;
+       ipv6_addr_copy(&fl.fl6_dst, daddr);
+       if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
+               ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+       fl.fl_ip_sport = inet->sport;
        
        /* merge ip6_build_xmit from ip6_output */
        if (opt && opt->srcrt) {
                struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-               ipv6_addr_copy(&final, &fl->fl6_dst);
-               ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+               ipv6_addr_copy(&final, &fl.fl6_dst);
+               ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
                final_p = &final;
                connected = 0;
        }
 
-       if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
-               fl->oif = np->mcast_oif;
+       if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
+               fl.oif = np->mcast_oif;
                connected = 0;
        }
 
-       security_sk_classify_flow(sk, fl);
+       security_sk_classify_flow(sk, &fl);
 
-       err = ip6_sk_dst_lookup(sk, &dst, fl);
+       err = ip6_sk_dst_lookup(sk, &dst, &fl);
        if (err)
                goto out;
        if (final_p)
-               ipv6_addr_copy(&fl->fl6_dst, final_p);
+               ipv6_addr_copy(&fl.fl6_dst, final_p);
 
-       if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0)
+       if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
                goto out;
 
        if (hlimit < 0) {
-               if (ipv6_addr_is_multicast(&fl->fl6_dst))
+               if (ipv6_addr_is_multicast(&fl.fl6_dst))
                        hlimit = np->mcast_hops;
                else
                        hlimit = np->hop_limit;
@@ -763,21 +763,23 @@ back_from_confirm:
 do_append_data:
        up->len += ulen;
        err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
-               sizeof(struct udphdr), hlimit, tclass, opt, fl,
+               sizeof(struct udphdr), hlimit, tclass, opt, &fl,
                (struct rt6_info*)dst,
                corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
        if (err)
                udp_v6_flush_pending_frames(sk);
        else if (!corkreq)
                err = udp_v6_push_pending_frames(sk, up);
+       else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+               up->pending = 0;
 
        if (dst) {
                if (connected) {
                        ip6_dst_store(sk, dst,
-                                     ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
+                                     ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
                                      &np->daddr : NULL,
 #ifdef CONFIG_IPV6_SUBTREES
-                                     ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
+                                     ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
                                      &np->saddr :
 #endif
                                      NULL);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
new file mode 100644 (file)
index 0000000..edcfffa
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
+ *
+ * Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
+ *                    Miika Komu     <miika@iki.fi>
+ *                    Herbert Xu     <herbert@gondor.apana.org.au>
+ *                    Abhinav Pathak <abhinav.pathak@hiit.fi>
+ *                    Jeff Ahrenholz <ahrenholz@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/stringify.h>
+#include <net/dsfield.h>
+#include <net/dst.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+
+/* Add encapsulation header.
+ *
+ * The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
+ * The following fields in it shall be filled in by x->type->output:
+ *     payload_len
+ *
+ * On exit, skb->h will be set to the start of the encapsulation header to be
+ * filled in by x->type->output and skb->nh will be set to the nextheader field
+ * of the extension header directly preceding the encapsulation header, or in
+ * its absence, that of the top IP header.  The value of skb->data will always
+ * point to the top IP header.
+ */
+static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+       struct ipv6hdr *iph, *top_iph;
+       u8 *prevhdr;
+       int hdr_len;
+
+       skb_push(skb, x->props.header_len);
+       iph = skb->nh.ipv6h;
+
+       hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
+       skb->nh.raw = prevhdr - x->props.header_len;
+       skb->h.raw = skb->data + hdr_len;
+       memmove(skb->data, iph, hdr_len);
+
+       skb->nh.raw = skb->data;
+       top_iph = skb->nh.ipv6h;
+       skb->nh.raw = &top_iph->nexthdr;
+       skb->h.ipv6h = top_iph + 1;
+
+       ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
+       ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+
+       return 0;
+}
+
+static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+       struct ipv6hdr *ip6h;
+       int size = sizeof(struct ipv6hdr);
+       int err = -EINVAL;
+
+       if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+               goto out;
+
+       skb_push(skb, size);
+       memmove(skb->data, skb->nh.raw, size);
+       skb->nh.raw = skb->data;
+
+       skb->mac.raw = memmove(skb->data - skb->mac_len,
+                              skb->mac.raw, skb->mac_len);
+
+       ip6h = skb->nh.ipv6h;
+       ip6h->payload_len = htons(skb->len - size);
+       ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
+       ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
+       err = 0;
+out:
+       return err;
+}
+
+static struct xfrm_mode xfrm6_beet_mode = {
+       .input = xfrm6_beet_input,
+       .output = xfrm6_beet_output,
+       .owner = THIS_MODULE,
+       .encap = XFRM_MODE_BEET,
+};
+
+static int __init xfrm6_beet_init(void)
+{
+       return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
+}
+
+static void __exit xfrm6_beet_exit(void)
+{
+       int err;
+
+       err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
+       BUG_ON(err);
+}
+
+module_init(xfrm6_beet_init);
+module_exit(xfrm6_beet_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
index 0a28d2c..ce94732 100644 (file)
@@ -365,7 +365,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 config NETFILTER_XT_MATCH_PHYSDEV
        tristate '"physdev" match support'
-       depends on NETFILTER_XTABLES && BRIDGE_NETFILTER
+       depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
        help
          Physdev packet matching matches against the physical bridge ports
          the IP packet arrived on or will leave by.
diff --git a/net/sched/estimator.c b/net/sched/estimator.c
deleted file mode 100644 (file)
index 0ebc98e..0000000
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * net/sched/estimator.c       Simple rate estimator.
- *
- *             This program is free software; you can redistribute it and/or
- *             modify it under the terms of the GNU General Public License
- *             as published by the Free Software Foundation; either version
- *             2 of the License, or (at your option) any later version.
- *
- * Authors:    Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/jiffies.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/netdevice.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/init.h>
-#include <net/sock.h>
-#include <net/pkt_sched.h>
-
-/*
-   This code is NOT intended to be used for statistics collection,
-   its purpose is to provide a base for statistical multiplexing
-   for controlled load service.
-   If you need only statistics, run a user level daemon which
-   periodically reads byte counters.
-
-   Unfortunately, rate estimation is not a very easy task.
-   F.e. I did not find a simple way to estimate the current peak rate
-   and even failed to formulate the problem 8)8)
-
-   So I preferred not to built an estimator into the scheduler,
-   but run this task separately.
-   Ideally, it should be kernel thread(s), but for now it runs
-   from timers, which puts apparent top bounds on the number of rated
-   flows, has minimal overhead on small, but is enough
-   to handle controlled load service, sets of aggregates.
-
-   We measure rate over A=(1<<interval) seconds and evaluate EWMA:
-
-   avrate = avrate*(1-W) + rate*W
-
-   where W is chosen as negative power of 2: W = 2^(-ewma_log)
-
-   The resulting time constant is:
-
-   T = A/(-ln(1-W))
-
-
-   NOTES.
-
-   * The stored value for avbps is scaled by 2^5, so that maximal
-     rate is ~1Gbit, avpps is scaled by 2^10.
-
-   * Minimal interval is HZ/4=250msec (it is the greatest common divisor
-     for HZ=100 and HZ=1024 8)), maximal interval
-     is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
-     are too expensive, longer ones can be implemented
-     at user level painlessly.
- */
-
-#define EST_MAX_INTERVAL       5
-
-struct qdisc_estimator
-{
-       struct qdisc_estimator  *next;
-       struct tc_stats         *stats;
-       spinlock_t              *stats_lock;
-       unsigned                interval;
-       int                     ewma_log;
-       u64                     last_bytes;
-       u32                     last_packets;
-       u32                     avpps;
-       u32                     avbps;
-};
-
-struct qdisc_estimator_head
-{
-       struct timer_list       timer;
-       struct qdisc_estimator  *list;
-};
-
-static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
-
-/* Estimator array lock */
-static DEFINE_RWLOCK(est_lock);
-
-static void est_timer(unsigned long arg)
-{
-       int idx = (int)arg;
-       struct qdisc_estimator *e;
-
-       read_lock(&est_lock);
-       for (e = elist[idx].list; e; e = e->next) {
-               struct tc_stats *st = e->stats;
-               u64 nbytes;
-               u32 npackets;
-               u32 rate;
-
-               spin_lock(e->stats_lock);
-               nbytes = st->bytes;
-               npackets = st->packets;
-               rate = (nbytes - e->last_bytes)<<(7 - idx);
-               e->last_bytes = nbytes;
-               e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
-               st->bps = (e->avbps+0xF)>>5;
-
-               rate = (npackets - e->last_packets)<<(12 - idx);
-               e->last_packets = npackets;
-               e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
-               e->stats->pps = (e->avpps+0x1FF)>>10;
-               spin_unlock(e->stats_lock);
-       }
-
-       mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
-       read_unlock(&est_lock);
-}
-
-int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
-{
-       struct qdisc_estimator *est;
-       struct tc_estimator *parm = RTA_DATA(opt);
-
-       if (RTA_PAYLOAD(opt) < sizeof(*parm))
-               return -EINVAL;
-
-       if (parm->interval < -2 || parm->interval > 3)
-               return -EINVAL;
-
-       est = kzalloc(sizeof(*est), GFP_KERNEL);
-       if (est == NULL)
-               return -ENOBUFS;
-
-       est->interval = parm->interval + 2;
-       est->stats = stats;
-       est->stats_lock = stats_lock;
-       est->ewma_log = parm->ewma_log;
-       est->last_bytes = stats->bytes;
-       est->avbps = stats->bps<<5;
-       est->last_packets = stats->packets;
-       est->avpps = stats->pps<<10;
-
-       est->next = elist[est->interval].list;
-       if (est->next == NULL) {
-               init_timer(&elist[est->interval].timer);
-               elist[est->interval].timer.data = est->interval;
-               elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
-               elist[est->interval].timer.function = est_timer;
-               add_timer(&elist[est->interval].timer);
-       }
-       write_lock_bh(&est_lock);
-       elist[est->interval].list = est;
-       write_unlock_bh(&est_lock);
-       return 0;
-}
-
-void qdisc_kill_estimator(struct tc_stats *stats)
-{
-       int idx;
-       struct qdisc_estimator *est, **pest;
-
-       for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
-               int killed = 0;
-               pest = &elist[idx].list;
-               while ((est=*pest) != NULL) {
-                       if (est->stats != stats) {
-                               pest = &est->next;
-                               continue;
-                       }
-
-                       write_lock_bh(&est_lock);
-                       *pest = est->next;
-                       write_unlock_bh(&est_lock);
-
-                       kfree(est);
-                       killed++;
-               }
-               if (killed && elist[idx].list == NULL)
-                       del_timer(&elist[idx].timer);
-       }
-}
-
-EXPORT_SYMBOL(qdisc_kill_estimator);
-EXPORT_SYMBOL(qdisc_new_estimator);
index 6c058e3..bb3ddd4 100644 (file)
@@ -391,7 +391,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
 /* If this triggers, it is a bug in this code, but it need not be fatal */
 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 {
-       if (!RB_EMPTY_NODE(rb)) {
+       if (RB_EMPTY_NODE(rb)) {
                WARN_ON(1);
        } else {
                rb_erase(rb, root);
index 693f02e..53bc8cb 100644 (file)
@@ -1666,8 +1666,9 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
                char addr_string[16];
 
                tipc_printf(TIPC_OUTPUT, "Msg seq number: %u,  ", msg_seqno(msg));
-               tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle);
-               
+               tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
+                                    (unsigned long) TIPC_SKB_CB(buf)->handle);
+
                n_ptr = l_ptr->owner->next;
                tipc_node_lock(n_ptr);
 
index 6ac4e4f..d401dc8 100644 (file)
@@ -41,17 +41,18 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
        return (h ^ (h >> 16)) & hmask;
 }
 
-static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
+static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
+                                      xfrm_address_t *saddr,
                                       unsigned short family,
                                       unsigned int hmask)
 {
        unsigned int h = family;
        switch (family) {
        case AF_INET:
-               h ^= __xfrm4_addr_hash(saddr);
+               h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
                break;
        case AF_INET6:
-               h ^= __xfrm6_addr_hash(saddr);
+               h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
                break;
        };
        return (h ^ (h >> 16)) & hmask;
index b6e2e79..2a78616 100644 (file)
@@ -778,8 +778,9 @@ void xfrm_policy_flush(u8 type)
        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
                struct xfrm_policy *pol;
                struct hlist_node *entry;
-               int i;
+               int i, killed;
 
+               killed = 0;
        again1:
                hlist_for_each_entry(pol, entry,
                                     &xfrm_policy_inexact[dir], bydst) {
@@ -790,6 +791,7 @@ void xfrm_policy_flush(u8 type)
                        write_unlock_bh(&xfrm_policy_lock);
 
                        xfrm_policy_kill(pol);
+                       killed++;
 
                        write_lock_bh(&xfrm_policy_lock);
                        goto again1;
@@ -807,13 +809,14 @@ void xfrm_policy_flush(u8 type)
                                write_unlock_bh(&xfrm_policy_lock);
 
                                xfrm_policy_kill(pol);
+                               killed++;
 
                                write_lock_bh(&xfrm_policy_lock);
                                goto again2;
                        }
                }
 
-               xfrm_policy_count[dir] = 0;
+               xfrm_policy_count[dir] -= killed;
        }
        atomic_inc(&flow_cache_genid);
        write_unlock_bh(&xfrm_policy_lock);
index f927b73..39b8bf3 100644 (file)
@@ -63,10 +63,11 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
        return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
 }
 
-static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
+static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
+                                        xfrm_address_t *saddr,
                                         unsigned short family)
 {
-       return __xfrm_src_hash(addr, family, xfrm_state_hmask);
+       return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
 }
 
 static inline unsigned int
@@ -92,7 +93,8 @@ static void xfrm_hash_transfer(struct hlist_head *list,
                                    nhashmask);
                hlist_add_head(&x->bydst, ndsttable+h);
 
-               h = __xfrm_src_hash(&x->props.saddr, x->props.family,
+               h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
+                                   x->props.family,
                                    nhashmask);
                hlist_add_head(&x->bysrc, nsrctable+h);
 
@@ -458,7 +460,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
 
 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
 {
-       unsigned int h = xfrm_src_hash(saddr, family);
+       unsigned int h = xfrm_src_hash(daddr, saddr, family);
        struct xfrm_state *x;
        struct hlist_node *entry;
 
@@ -587,7 +589,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
                if (km_query(x, tmpl, pol) == 0) {
                        x->km.state = XFRM_STATE_ACQ;
                        hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-                       h = xfrm_src_hash(saddr, family);
+                       h = xfrm_src_hash(daddr, saddr, family);
                        hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
                        if (x->id.spi) {
                                h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
@@ -622,7 +624,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
                          x->props.reqid, x->props.family);
        hlist_add_head(&x->bydst, xfrm_state_bydst+h);
 
-       h = xfrm_src_hash(&x->props.saddr, x->props.family);
+       h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
        hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
 
        if (x->id.spi) {
@@ -748,7 +750,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
                x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
                add_timer(&x->timer);
                hlist_add_head(&x->bydst, xfrm_state_bydst+h);
-               h = xfrm_src_hash(saddr, family);
+               h = xfrm_src_hash(daddr, saddr, family);
                hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
                wake_up(&km_waitq);
        }
index c59a78d..d54b3a7 100644 (file)
@@ -211,6 +211,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
        case XFRM_MODE_TRANSPORT:
        case XFRM_MODE_TUNNEL:
        case XFRM_MODE_ROUTEOPTIMIZATION:
+       case XFRM_MODE_BEET:
                break;
 
        default: