Merge branch 'flow_key_hashing'
authorDavid S. Miller <davem@davemloft.net>
Thu, 4 Jun 2015 22:44:32 +0000 (15:44 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 4 Jun 2015 22:44:32 +0000 (15:44 -0700)
Tom Herbert says:

====================
net: Increase inputs to flow_keys hashing

This patch set adds new fields to the flow_keys structure and hashes
over these fields to get a better flow hash. In particular, these
patches now include hashing over the full IPv6 addresses in order
to defend against address spoofing that always results in the
same hash. The new input also includes the Ethertype, L4 protocol,
VLAN, flow label, GRE keyid, and MPLS entropy label.

In order to increase hash inputs, we switch to using jhash2
which operates an an array of u32's. jhash2 operates on multiples of
three words. The data in the hash is constructed for that, and there
are are two variants for IPv4 and Ipv6 addressing. For IPv4 addresses,
jhash is performed over six u32's and for IPv6 it is done over twelve.

flow_keys can store either IPv4 or IPv6 addresses (addr_proto field
is a selector). ipv6_addr_hash is no longer used to convert addresses
for setting in flow table. For legacy uses of flow keys outside of
flow_dissector the flow_get_u32_src and flow_get_u32_dst functions
have been added to get u32 representation representations of addresses
in flow_keys.

For flow lables we also eliminate the short circuit in flow_dissector
for non-zero flow label. The flow label is now considered additional
input to ports.

Testing: Ran netperf TCP_RR for 200 flows using IPv4 and IPv6 comparing
before the patches and with the patches. Did not detect any performance
degradation.

v2:
  - Took out MPLS entropy label. Will add this later.
v3:
  - Ensure hash start offset is a four byte boundary. Add BUG_BUILD_ON
    to check for this.
  - Fixes sparse error in GRE to get entropy from keyid.
v4:
  - Rebase to Jiri changes to generalize flow dissection
  - Support TIPC as its own address
  - Bring back MPLS entropy label dissection
  - Remove FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS

v5:
  - Minor fixes from feedback

v6:
  - Cleanup and sparse issue with flow label
  - Change keyid to returned by flow_dissector to be __be32
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
12 files changed:
drivers/net/bonding/bond_main.c
drivers/net/ethernet/cisco/enic/enic_clsf.c
drivers/net/ethernet/cisco/enic/enic_ethtool.c
include/linux/skbuff.h
include/net/flow_dissector.h
include/net/ip.h
include/net/ipv6.h
include/uapi/linux/in.h
net/core/flow_dissector.c
net/ethernet/eth.c
net/sched/cls_flow.c
net/sched/cls_flower.c

index 2268438..19eb990 100644 (file)
@@ -3059,8 +3059,7 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
                        return false;
                iph = ip_hdr(skb);
-               fk->addrs.src = iph->saddr;
-               fk->addrs.dst = iph->daddr;
+               iph_to_flow_copy_v4addrs(fk, iph);
                noff += iph->ihl << 2;
                if (!ip_is_fragment(iph))
                        proto = iph->protocol;
@@ -3068,8 +3067,7 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
                        return false;
                iph6 = ipv6_hdr(skb);
-               fk->addrs.src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
-               fk->addrs.dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+               iph_to_flow_copy_v6addrs(fk, iph6);
                noff += sizeof(*iph6);
                proto = iph6->nexthdr;
        } else {
@@ -3103,7 +3101,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
                hash = bond_eth_hash(skb);
        else
                hash = (__force u32)flow.ports.ports;
-       hash ^= (__force u32)flow.addrs.dst ^ (__force u32)flow.addrs.src;
+       hash ^= (__force u32)flow_get_u32_dst(&flow) ^
+               (__force u32)flow_get_u32_src(&flow);
        hash ^= (hash >> 16);
        hash ^= (hash >> 8);
 
index a31b57a..d106186 100644 (file)
@@ -33,8 +33,8 @@ int enic_addfltr_5t(struct enic *enic, struct flow_keys *keys, u16 rq)
                return -EPROTONOSUPPORT;
        };
        data.type = FILTER_IPV4_5TUPLE;
-       data.u.ipv4.src_addr = ntohl(keys->addrs.src);
-       data.u.ipv4.dst_addr = ntohl(keys->addrs.dst);
+       data.u.ipv4.src_addr = ntohl(keys->addrs.v4addrs.src);
+       data.u.ipv4.dst_addr = ntohl(keys->addrs.v4addrs.dst);
        data.u.ipv4.src_port = ntohs(keys->ports.src);
        data.u.ipv4.dst_port = ntohs(keys->ports.dst);
        data.u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE;
@@ -158,8 +158,8 @@ static struct enic_rfs_fltr_node *htbl_key_search(struct hlist_head *h,
        struct enic_rfs_fltr_node *tpos;
 
        hlist_for_each_entry(tpos, h, node)
-               if (tpos->keys.addrs.src == k->addrs.src &&
-                   tpos->keys.addrs.dst == k->addrs.dst &&
+               if (tpos->keys.addrs.v4addrs.src == k->addrs.v4addrs.src &&
+                   tpos->keys.addrs.v4addrs.dst == k->addrs.v4addrs.dst &&
                    tpos->keys.ports.ports == k->ports.ports &&
                    tpos->keys.basic.ip_proto == k->basic.ip_proto &&
                    tpos->keys.basic.n_proto == k->basic.n_proto)
index 117c096..73874b2 100644 (file)
@@ -346,10 +346,10 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd)
                break;
        }
 
-       fsp->h_u.tcp_ip4_spec.ip4src = n->keys.addrs.src;
+       fsp->h_u.tcp_ip4_spec.ip4src = flow_get_u32_src(&n->keys);
        fsp->m_u.tcp_ip4_spec.ip4src = (__u32)~0;
 
-       fsp->h_u.tcp_ip4_spec.ip4dst = n->keys.addrs.dst;
+       fsp->h_u.tcp_ip4_spec.ip4dst = flow_get_u32_dst(&n->keys);
        fsp->m_u.tcp_ip4_spec.ip4dst = (__u32)~0;
 
        fsp->h_u.tcp_ip4_spec.psrc = n->keys.ports.src;
index 6b41c15..cc612fc 100644 (file)
@@ -1943,7 +1943,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
        if (skb_transport_header_was_set(skb))
                return;
        else if (skb_flow_dissect_flow_keys(skb, &keys))
-               skb_set_transport_header(skb, keys.basic.thoff);
+               skb_set_transport_header(skb, keys.control.thoff);
        else
                skb_set_transport_header(skb, offset_hint);
 }
index bac9c14..1a8c224 100644 (file)
@@ -6,6 +6,15 @@
 #include <linux/in6.h>
 #include <uapi/linux/if_ether.h>
 
+/**
+ * struct flow_dissector_key_control:
+ * @thoff: Transport header offset
+ */
+struct flow_dissector_key_control {
+       u16     thoff;
+       u16     addr_type;
+};
+
 /**
  * struct flow_dissector_key_basic:
  * @thoff: Transport header offset
  * @ip_proto: Transport header protocol (eg. TCP/UDP)
  */
 struct flow_dissector_key_basic {
-       u16     thoff;
        __be16  n_proto;
        u8      ip_proto;
+       u8      padding;
+};
+
+struct flow_dissector_key_tags {
+       u32     vlan_id:12,
+               flow_label:20;
+};
+
+struct flow_dissector_key_keyid {
+       __be32  keyid;
 };
 
 /**
- * struct flow_dissector_key_addrs:
- * @src: source ip address in case of IPv4
- *      For IPv6 it contains 32bit hash of src address
- * @dst: destination ip address in case of IPv4
- *      For IPv6 it contains 32bit hash of dst address
+ * struct flow_dissector_key_ipv4_addrs:
+ * @src: source ip address
+ * @dst: destination ip address
  */
-struct flow_dissector_key_addrs {
+struct flow_dissector_key_ipv4_addrs {
        /* (src,dst) must be grouped, in the same way than in IP header */
        __be32 src;
        __be32 dst;
 };
 
+/**
+ * struct flow_dissector_key_ipv6_addrs:
+ * @src: source ip address
+ * @dst: destination ip address
+ */
+struct flow_dissector_key_ipv6_addrs {
+       /* (src,dst) must be grouped, in the same way than in IP header */
+       struct in6_addr src;
+       struct in6_addr dst;
+};
+
+/**
+ * struct flow_dissector_key_tipc_addrs:
+ * @srcnode: source node address
+ */
+struct flow_dissector_key_tipc_addrs {
+       __be32 srcnode;
+};
+
+/**
+ * struct flow_dissector_key_addrs:
+ * @v4addrs: IPv4 addresses
+ * @v6addrs: IPv6 addresses
+ */
+struct flow_dissector_key_addrs {
+       union {
+               struct flow_dissector_key_ipv4_addrs v4addrs;
+               struct flow_dissector_key_ipv6_addrs v6addrs;
+               struct flow_dissector_key_tipc_addrs tipcaddrs;
+       };
+};
+
 /**
  * flow_dissector_key_tp_ports:
  *     @ports: port numbers of Transport header
@@ -47,16 +95,6 @@ struct flow_dissector_key_ports {
        };
 };
 
-/**
- * struct flow_dissector_key_ipv6_addrs:
- * @src: source ip address
- * @dst: destination ip address
- */
-struct flow_dissector_key_ipv6_addrs {
-       /* (src,dst) must be grouped, in the same way than in IP header */
-       struct in6_addr src;
-       struct in6_addr dst;
-};
 
 /**
  * struct flow_dissector_key_eth_addrs:
@@ -70,12 +108,17 @@ struct flow_dissector_key_eth_addrs {
 };
 
 enum flow_dissector_key_id {
+       FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */
        FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */
-       FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_addrs */
-       FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS, /* struct flow_dissector_key_addrs */
-       FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
+       FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
        FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
+       FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
        FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
+       FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */
+       FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */
+       FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
+       FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */
+       FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */
 
        FLOW_DISSECTOR_KEY_MAX,
 };
@@ -109,11 +152,21 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb,
 }
 
 struct flow_keys {
-       struct flow_dissector_key_addrs addrs;
-       struct flow_dissector_key_ports ports;
+       struct flow_dissector_key_control control;
+#define FLOW_KEYS_HASH_START_FIELD basic
        struct flow_dissector_key_basic basic;
+       struct flow_dissector_key_tags tags;
+       struct flow_dissector_key_keyid keyid;
+       struct flow_dissector_key_ports ports;
+       struct flow_dissector_key_addrs addrs;
 };
 
+#define FLOW_KEYS_HASH_OFFSET          \
+       offsetof(struct flow_keys, FLOW_KEYS_HASH_START_FIELD)
+
+__be32 flow_get_u32_src(const struct flow_keys *flow);
+__be32 flow_get_u32_dst(const struct flow_keys *flow);
+
 extern struct flow_dissector flow_keys_dissector;
 extern struct flow_dissector flow_keys_buf_dissector;
 
index 9b976cf..0750a18 100644 (file)
@@ -355,13 +355,30 @@ static inline __wsum inet_compute_pseudo(struct sk_buff *skb, int proto)
                                  skb->len, proto, 0);
 }
 
+/* copy IPv4 saddr & daddr to flow_keys, possibly using 64bit load/store
+ * Equivalent to :     flow->v4addrs.src = iph->saddr;
+ *                     flow->v4addrs.dst = iph->daddr;
+ */
+static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
+                                           const struct iphdr *iph)
+{
+       BUILD_BUG_ON(offsetof(typeof(flow->addrs), v4addrs.dst) !=
+                    offsetof(typeof(flow->addrs), v4addrs.src) +
+                             sizeof(flow->addrs.v4addrs.src));
+       memcpy(&flow->addrs.v4addrs, &iph->saddr, sizeof(flow->addrs.v4addrs));
+       flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+}
+
 static inline void inet_set_txhash(struct sock *sk)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct flow_keys keys;
 
-       keys.addrs.src = inet->inet_saddr;
-       keys.addrs.dst = inet->inet_daddr;
+       memset(&keys, 0, sizeof(keys));
+
+       keys.addrs.v4addrs.src = inet->inet_saddr;
+       keys.addrs.v4addrs.dst = inet->inet_daddr;
+       keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
        keys.ports.src = inet->inet_sport;
        keys.ports.dst = inet->inet_dport;
 
index 35d485c..82dbdb0 100644 (file)
@@ -692,6 +692,20 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
        return hlimit;
 }
 
+/* copy IPv6 saddr & daddr to flow_keys, possibly using 64bit load/store
+ * Equivalent to :     flow->v6addrs.src = iph->saddr;
+ *                     flow->v6addrs.dst = iph->daddr;
+ */
+static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
+                                           const struct ipv6hdr *iph)
+{
+       BUILD_BUG_ON(offsetof(typeof(flow->addrs), v6addrs.dst) !=
+                    offsetof(typeof(flow->addrs), v6addrs.src) +
+                    sizeof(flow->addrs.v6addrs.src));
+       memcpy(&flow->addrs.v6addrs, &iph->saddr, sizeof(flow->addrs.v6addrs));
+       flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static inline void ip6_set_txhash(struct sock *sk)
 {
@@ -699,8 +713,13 @@ static inline void ip6_set_txhash(struct sock *sk)
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct flow_keys keys;
 
-       keys.addrs.src = (__force __be32)ipv6_addr_hash(&np->saddr);
-       keys.addrs.dst = (__force __be32)ipv6_addr_hash(&sk->sk_v6_daddr);
+       memset(&keys, 0, sizeof(keys));
+
+       memcpy(&keys.addrs.v6addrs.src, &np->saddr,
+              sizeof(keys.addrs.v6addrs.src));
+       memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
+              sizeof(keys.addrs.v6addrs.dst));
+       keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
        keys.ports.src = inet->inet_sport;
        keys.ports.dst = inet->inet_dport;
 
index 589ced0..641338b 100644 (file)
@@ -69,6 +69,8 @@ enum {
 #define IPPROTO_SCTP           IPPROTO_SCTP
   IPPROTO_UDPLITE = 136,       /* UDP-Lite (RFC 3828)                  */
 #define IPPROTO_UDPLITE                IPPROTO_UDPLITE
+  IPPROTO_MPLS = 137,          /* MPLS in IP (RFC 4023)                */
+#define IPPROTO_MPLS           IPPROTO_MPLS
   IPPROTO_RAW = 255,           /* Raw IP packets                       */
 #define IPPROTO_RAW            IPPROTO_RAW
   IPPROTO_MAX
index 1f2d893..77e22e4 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/ppp_defs.h>
 #include <linux/stddef.h>
 #include <linux/if_ether.h>
+#include <linux/mpls.h>
 #include <net/flow_dissector.h>
 #include <scsi/fc/fc_fcoe.h>
 
@@ -57,9 +58,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
                flow_dissector->offset[key->key_id] = key->offset;
        }
 
-       /* Ensure that the dissector always includes basic key. That way
-        * we are able to avoid handling lack of it in fast path.
+       /* Ensure that the dissector always includes control and basic key.
+        * That way we are able to avoid handling lack of these in fast path.
         */
+       BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
+                                           FLOW_DISSECTOR_KEY_CONTROL));
        BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
                                            FLOW_DISSECTOR_KEY_BASIC));
 }
@@ -120,9 +123,12 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
                        void *target_container,
                        void *data, __be16 proto, int nhoff, int hlen)
 {
+       struct flow_dissector_key_control *key_control;
        struct flow_dissector_key_basic *key_basic;
        struct flow_dissector_key_addrs *key_addrs;
        struct flow_dissector_key_ports *key_ports;
+       struct flow_dissector_key_tags *key_tags;
+       struct flow_dissector_key_keyid *key_keyid;
        u8 ip_proto;
 
        if (!data) {
@@ -132,6 +138,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
                hlen = skb_headlen(skb);
        }
 
+       /* It is ensured by skb_flow_dissector_init() that control key will
+        * be always present.
+        */
+       key_control = skb_flow_dissector_target(flow_dissector,
+                                               FLOW_DISSECTOR_KEY_CONTROL,
+                                               target_container);
+
        /* It is ensured by skb_flow_dissector_init() that basic key will
         * be always present.
         */
@@ -168,10 +181,12 @@ ip:
                if (!skb_flow_dissector_uses_key(flow_dissector,
                                                 FLOW_DISSECTOR_KEY_IPV4_ADDRS))
                        break;
+
                key_addrs = skb_flow_dissector_target(flow_dissector,
-                                                     FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-                                                     target_container);
-               memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs));
+                             FLOW_DISSECTOR_KEY_IPV4_ADDRS, target_container);
+               memcpy(&key_addrs->v4addrs, &iph->saddr,
+                      sizeof(key_addrs->v4addrs));
+               key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
                break;
        }
        case htons(ETH_P_IPV6): {
@@ -187,16 +202,6 @@ ipv6:
                ip_proto = iph->nexthdr;
                nhoff += sizeof(struct ipv6hdr);
 
-               if (skb_flow_dissector_uses_key(flow_dissector,
-                                               FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
-                       key_addrs = skb_flow_dissector_target(flow_dissector,
-                                                             FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
-                                                             target_container);
-
-                       key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
-                       key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
-                       goto flow_label;
-               }
                if (skb_flow_dissector_uses_key(flow_dissector,
                                                FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
                        struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
@@ -206,30 +211,18 @@ ipv6:
                                                                   target_container);
 
                        memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
-                       goto flow_label;
+                       key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
                }
-               break;
-flow_label:
+
                flow_label = ip6_flowlabel(iph);
                if (flow_label) {
-                       /* Awesome, IPv6 packet has a flow label so we can
-                        * use that to represent the ports without any
-                        * further dissection.
-                        */
-
-                       key_basic->n_proto = proto;
-                       key_basic->ip_proto = ip_proto;
-                       key_basic->thoff = (u16)nhoff;
-
                        if (skb_flow_dissector_uses_key(flow_dissector,
-                                                       FLOW_DISSECTOR_KEY_PORTS)) {
-                               key_ports = skb_flow_dissector_target(flow_dissector,
-                                                                     FLOW_DISSECTOR_KEY_PORTS,
-                                                                     target_container);
-                               key_ports->ports = flow_label;
+                               FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+                               key_tags = skb_flow_dissector_target(flow_dissector,
+                                                                    FLOW_DISSECTOR_KEY_FLOW_LABEL,
+                                                                    target_container);
+                               key_tags->flow_label = ntohl(flow_label);
                        }
-
-                       return true;
                }
 
                break;
@@ -243,6 +236,15 @@ flow_label:
                if (!vlan)
                        return false;
 
+               if (skb_flow_dissector_uses_key(flow_dissector,
+                                               FLOW_DISSECTOR_KEY_VLANID)) {
+                       key_tags = skb_flow_dissector_target(flow_dissector,
+                                                            FLOW_DISSECTOR_KEY_VLANID,
+                                                            target_container);
+
+                       key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+               }
+
                proto = vlan->h_vlan_encapsulated_proto;
                nhoff += sizeof(*vlan);
                goto again;
@@ -275,20 +277,51 @@ flow_label:
                if (!hdr)
                        return false;
                key_basic->n_proto = proto;
-               key_basic->thoff = (u16)nhoff;
+               key_control->thoff = (u16)nhoff;
 
                if (skb_flow_dissector_uses_key(flow_dissector,
-                                               FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+                                               FLOW_DISSECTOR_KEY_TIPC_ADDRS)) {
                        key_addrs = skb_flow_dissector_target(flow_dissector,
-                                                             FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+                                                             FLOW_DISSECTOR_KEY_TIPC_ADDRS,
                                                              target_container);
-                       key_addrs->src = hdr->srcnode;
-                       key_addrs->dst = 0;
+                       key_addrs->tipcaddrs.srcnode = hdr->srcnode;
+                       key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS;
+               }
+               return true;
+       }
+
+       case htons(ETH_P_MPLS_UC):
+       case htons(ETH_P_MPLS_MC): {
+               struct mpls_label *hdr, _hdr[2];
+mpls:
+               hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+                                          hlen, &_hdr);
+               if (!hdr)
+                       return false;
+
+               if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) ==
+                    MPLS_LABEL_ENTROPY) {
+                       if (skb_flow_dissector_uses_key(flow_dissector,
+                                                       FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
+                               key_keyid = skb_flow_dissector_target(flow_dissector,
+                                                                     FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+                                                                     target_container);
+                               key_keyid->keyid = hdr[1].entry &
+                                       htonl(MPLS_LS_LABEL_MASK);
+                       }
+
+                       key_basic->n_proto = proto;
+                       key_basic->ip_proto = ip_proto;
+                       key_control->thoff = (u16)nhoff;
+
+                       return true;
                }
+
                return true;
        }
+
        case htons(ETH_P_FCOE):
-               key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+               key_control->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
                /* fall through */
        default:
                return false;
@@ -308,30 +341,47 @@ flow_label:
                 * Only look inside GRE if version zero and no
                 * routing
                 */
-               if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
-                       proto = hdr->proto;
+               if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+                       break;
+
+               proto = hdr->proto;
+               nhoff += 4;
+               if (hdr->flags & GRE_CSUM)
                        nhoff += 4;
-                       if (hdr->flags & GRE_CSUM)
-                               nhoff += 4;
-                       if (hdr->flags & GRE_KEY)
-                               nhoff += 4;
-                       if (hdr->flags & GRE_SEQ)
-                               nhoff += 4;
-                       if (proto == htons(ETH_P_TEB)) {
-                               const struct ethhdr *eth;
-                               struct ethhdr _eth;
-
-                               eth = __skb_header_pointer(skb, nhoff,
-                                                          sizeof(_eth),
-                                                          data, hlen, &_eth);
-                               if (!eth)
-                                       return false;
-                               proto = eth->h_proto;
-                               nhoff += sizeof(*eth);
+               if (hdr->flags & GRE_KEY) {
+                       const __be32 *keyid;
+                       __be32 _keyid;
+
+                       keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+                                                    data, hlen, &_keyid);
+
+                       if (!keyid)
+                               return false;
+
+                       if (skb_flow_dissector_uses_key(flow_dissector,
+                                                       FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+                               key_keyid = skb_flow_dissector_target(flow_dissector,
+                                                                     FLOW_DISSECTOR_KEY_GRE_KEYID,
+                                                                     target_container);
+                               key_keyid->keyid = *keyid;
                        }
-                       goto again;
+                       nhoff += 4;
                }
-               break;
+               if (hdr->flags & GRE_SEQ)
+                       nhoff += 4;
+               if (proto == htons(ETH_P_TEB)) {
+                       const struct ethhdr *eth;
+                       struct ethhdr _eth;
+
+                       eth = __skb_header_pointer(skb, nhoff,
+                                                  sizeof(_eth),
+                                                  data, hlen, &_eth);
+                       if (!eth)
+                               return false;
+                       proto = eth->h_proto;
+                       nhoff += sizeof(*eth);
+               }
+               goto again;
        }
        case IPPROTO_IPIP:
                proto = htons(ETH_P_IP);
@@ -339,19 +389,16 @@ flow_label:
        case IPPROTO_IPV6:
                proto = htons(ETH_P_IPV6);
                goto ipv6;
+       case IPPROTO_MPLS:
+               proto = htons(ETH_P_MPLS_UC);
+               goto mpls;
        default:
                break;
        }
 
-       /* It is ensured by skb_flow_dissector_init() that basic key will
-        * be always present.
-        */
-       key_basic = skb_flow_dissector_target(flow_dissector,
-                                             FLOW_DISSECTOR_KEY_BASIC,
-                                             target_container);
        key_basic->n_proto = proto;
        key_basic->ip_proto = ip_proto;
-       key_basic->thoff = (u16) nhoff;
+       key_control->thoff = (u16)nhoff;
 
        if (skb_flow_dissector_uses_key(flow_dissector,
                                        FLOW_DISSECTOR_KEY_PORTS)) {
@@ -372,27 +419,109 @@ static __always_inline void __flow_hash_secret_init(void)
        net_get_random_once(&hashrnd, sizeof(hashrnd));
 }
 
-static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval)
+static __always_inline u32 __flow_hash_words(u32 *words, u32 length, u32 keyval)
+{
+       return jhash2(words, length, keyval);
+}
+
+static inline void *flow_keys_hash_start(struct flow_keys *flow)
+{
+       BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
+       return (void *)flow + FLOW_KEYS_HASH_OFFSET;
+}
+
+static inline size_t flow_keys_hash_length(struct flow_keys *flow)
 {
-       return jhash_3words(a, b, c, keyval);
+       size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
+       BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
+       BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
+                    sizeof(*flow) - sizeof(flow->addrs));
+
+       switch (flow->control.addr_type) {
+       case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+               diff -= sizeof(flow->addrs.v4addrs);
+               break;
+       case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+               diff -= sizeof(flow->addrs.v6addrs);
+               break;
+       case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
+               diff -= sizeof(flow->addrs.tipcaddrs);
+               break;
+       }
+       return (sizeof(*flow) - diff) / sizeof(u32);
+}
+
+__be32 flow_get_u32_src(const struct flow_keys *flow)
+{
+       switch (flow->control.addr_type) {
+       case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+               return flow->addrs.v4addrs.src;
+       case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+               return (__force __be32)ipv6_addr_hash(
+                       &flow->addrs.v6addrs.src);
+       case FLOW_DISSECTOR_KEY_TIPC_ADDRS:
+               return flow->addrs.tipcaddrs.srcnode;
+       default:
+               return 0;
+       }
+}
+EXPORT_SYMBOL(flow_get_u32_src);
+
+__be32 flow_get_u32_dst(const struct flow_keys *flow)
+{
+       switch (flow->control.addr_type) {
+       case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+               return flow->addrs.v4addrs.dst;
+       case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+               return (__force __be32)ipv6_addr_hash(
+                       &flow->addrs.v6addrs.dst);
+       default:
+               return 0;
+       }
+}
+EXPORT_SYMBOL(flow_get_u32_dst);
+
+static inline void __flow_hash_consistentify(struct flow_keys *keys)
+{
+       int addr_diff, i;
+
+       switch (keys->control.addr_type) {
+       case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+               addr_diff = (__force u32)keys->addrs.v4addrs.dst -
+                           (__force u32)keys->addrs.v4addrs.src;
+               if ((addr_diff < 0) ||
+                   (addr_diff == 0 &&
+                    ((__force u16)keys->ports.dst <
+                     (__force u16)keys->ports.src))) {
+                       swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+                       swap(keys->ports.src, keys->ports.dst);
+               }
+               break;
+       case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+               addr_diff = memcmp(&keys->addrs.v6addrs.dst,
+                                  &keys->addrs.v6addrs.src,
+                                  sizeof(keys->addrs.v6addrs.dst));
+               if ((addr_diff < 0) ||
+                   (addr_diff == 0 &&
+                    ((__force u16)keys->ports.dst <
+                     (__force u16)keys->ports.src))) {
+                       for (i = 0; i < 4; i++)
+                               swap(keys->addrs.v6addrs.src.s6_addr32[i],
+                                    keys->addrs.v6addrs.dst.s6_addr32[i]);
+                       swap(keys->ports.src, keys->ports.dst);
+               }
+               break;
+       }
 }
 
 static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
 {
        u32 hash;
 
-       /* get a consistent hash (same value on both flow directions) */
-       if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) ||
-           (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) &&
-            ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) {
-               swap(keys->addrs.dst, keys->addrs.src);
-               swap(keys->ports.src, keys->ports.dst);
-       }
+       __flow_hash_consistentify(keys);
 
-       hash = __flow_hash_3words((__force u32)keys->addrs.dst,
-                                 (__force u32)keys->addrs.src,
-                                 (__force u32)keys->ports.ports,
-                                 keyval);
+       hash = __flow_hash_words((u32 *)flow_keys_hash_start(keys),
+                                flow_keys_hash_length(keys), keyval);
        if (!hash)
                hash = 1;
 
@@ -437,8 +566,8 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
        data->n_proto = flow->basic.n_proto;
        data->ip_proto = flow->basic.ip_proto;
        data->ports = flow->ports.ports;
-       data->src = flow->addrs.src;
-       data->dst = flow->addrs.dst;
+       data->src = flow->addrs.v4addrs.src;
+       data->dst = flow->addrs.v4addrs.dst;
 }
 EXPORT_SYMBOL(make_flow_keys_digest);
 
@@ -479,7 +608,7 @@ EXPORT_SYMBOL(skb_get_hash_perturb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
                   const struct flow_keys *keys, int hlen)
 {
-       u32 poff = keys->basic.thoff;
+       u32 poff = keys->control.thoff;
 
        switch (keys->basic.ip_proto) {
        case IPPROTO_TCP: {
@@ -542,25 +671,49 @@ u32 skb_get_poff(const struct sk_buff *skb)
 }
 
 static const struct flow_dissector_key flow_keys_dissector_keys[] = {
+       {
+               .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+               .offset = offsetof(struct flow_keys, control),
+       },
        {
                .key_id = FLOW_DISSECTOR_KEY_BASIC,
                .offset = offsetof(struct flow_keys, basic),
        },
        {
                .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-               .offset = offsetof(struct flow_keys, addrs),
+               .offset = offsetof(struct flow_keys, addrs.v4addrs),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+               .offset = offsetof(struct flow_keys, addrs.v6addrs),
        },
        {
-               .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
-               .offset = offsetof(struct flow_keys, addrs),
+               .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS,
+               .offset = offsetof(struct flow_keys, addrs.tipcaddrs),
        },
        {
                .key_id = FLOW_DISSECTOR_KEY_PORTS,
                .offset = offsetof(struct flow_keys, ports),
        },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_VLANID,
+               .offset = offsetof(struct flow_keys, tags),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
+               .offset = offsetof(struct flow_keys, tags),
+       },
+       {
+               .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+               .offset = offsetof(struct flow_keys, keyid),
+       },
 };
 
 static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+       {
+               .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+               .offset = offsetof(struct flow_keys, control),
+       },
        {
                .key_id = FLOW_DISSECTOR_KEY_BASIC,
                .offset = offsetof(struct flow_keys, basic),
index 7d0e239..77e0f0e 100644 (file)
@@ -133,7 +133,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
        /* parse any remaining L2/L3 headers, check for L4 */
        if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
                                            sizeof(*eth), len))
-               return max_t(u32, keys.basic.thoff, sizeof(*eth));
+               return max_t(u32, keys.control.thoff, sizeof(*eth));
 
        /* parse for any L4 headers */
        return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
index b435992..76bc3a2 100644 (file)
@@ -68,15 +68,21 @@ static inline u32 addr_fold(void *addr)
 
 static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-       if (flow->addrs.src)
-               return ntohl(flow->addrs.src);
+       __be32 src = flow_get_u32_src(flow);
+
+       if (src)
+               return ntohl(src);
+
        return addr_fold(skb->sk);
 }
 
 static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-       if (flow->addrs.dst)
-               return ntohl(flow->addrs.dst);
+       __be32 dst = flow_get_u32_dst(flow);
+
+       if (dst)
+               return ntohl(dst);
+
        return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
index 8c8f34e..b92d3f4 100644 (file)
 
 struct fl_flow_key {
        int     indev_ifindex;
+       struct flow_dissector_key_control control;
        struct flow_dissector_key_basic basic;
        struct flow_dissector_key_eth_addrs eth;
+       struct flow_dissector_key_addrs ipaddrs;
        union {
-               struct flow_dissector_key_addrs ipv4;
+               struct flow_dissector_key_ipv4_addrs ipv4;
                struct flow_dissector_key_ipv6_addrs ipv6;
        };
        struct flow_dissector_key_ports tp;
@@ -259,14 +261,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                               &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
                               sizeof(key->basic.ip_proto));
        }
-       if (key->basic.n_proto == htons(ETH_P_IP)) {
+       if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
                fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
                               &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
                               sizeof(key->ipv4.src));
                fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
                               &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
                               sizeof(key->ipv4.dst));
-       } else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
+       } else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
                fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
                               &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
                               sizeof(key->ipv6.src));
@@ -347,6 +349,7 @@ static void fl_init_dissector(struct cls_fl_head *head,
        struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
        size_t cnt = 0;
 
+       FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
        FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
        FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
                               FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
@@ -608,7 +611,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
                            sizeof(key->basic.ip_proto)))
                goto nla_put_failure;
 
-       if (key->basic.n_proto == htons(ETH_P_IP) &&
+       if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
            (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
                             &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
                             sizeof(key->ipv4.src)) ||
@@ -616,7 +619,7 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
                             &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
                             sizeof(key->ipv4.dst))))
                goto nla_put_failure;
-       else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+       else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
                 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
                                  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
                                  sizeof(key->ipv6.src)) ||