inetpeer: restore small inet_peer structures
authorEric Dumazet <eric.dumazet@gmail.com>
Wed, 16 Jun 2010 04:52:13 +0000 (04:52 +0000)
committerDavid S. Miller <davem@davemloft.net>
Wed, 16 Jun 2010 18:55:39 +0000 (11:55 -0700)
Addition of rcu_head to struct inet_peer added 16bytes on 64bit arches.

Thats a bit unfortunate, since old size was exactly 64 bytes.

This can be solved, using an union between this rcu_head an four fields,
that are normally used only when a refcount is taken on inet_peer.
rcu_head is used only when refcnt=-1, right before structure freeing.

Add a inet_peer_refcheck() function to check this assertion for a while.

We can bring back SLAB_HWCACHE_ALIGN qualifier in kmem cache creation.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/inetpeer.h
net/ipv4/inetpeer.c
net/ipv4/route.c
net/ipv4/tcp_ipv4.c

index 6174047..417d0c8 100644 (file)
@@ -22,11 +22,21 @@ struct inet_peer {
        __u32                   dtime;          /* the time of last use of not
                                                 * referenced entries */
        atomic_t                refcnt;
-       atomic_t                rid;            /* Frag reception counter */
-       atomic_t                ip_id_count;    /* IP ID for the next packet */
-       __u32                   tcp_ts;
-       __u32                   tcp_ts_stamp;
-       struct rcu_head         rcu;
+       /*
+        * Once inet_peer is queued for deletion (refcnt == -1), following fields
+        * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+        * We can share memory with rcu_head to keep inet_peer small
+        * (less then 64 bytes)
+        */
+       union {
+               struct {
+                       atomic_t        rid;            /* Frag reception counter */
+                       atomic_t        ip_id_count;    /* IP ID for the next packet */
+                       __u32           tcp_ts;
+                       __u32           tcp_ts_stamp;
+               };
+               struct rcu_head         rcu;
+       };
 };
 
 void                   inet_initpeers(void) __init;
@@ -37,10 +47,21 @@ struct inet_peer    *inet_getpeer(__be32 daddr, int create);
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
 
+/*
+ * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * tcp_ts_stamp if no refcount is taken on inet_peer
+ */
+static inline void inet_peer_refcheck(const struct inet_peer *p)
+{
+       WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
+}
+
+
 /* can be called with or without local BH being disabled */
 static inline __u16    inet_getid(struct inet_peer *p, int more)
 {
        more++;
+       inet_peer_refcheck(p);
        return atomic_add_return(more, &p->ip_id_count) - more;
 }
 
index 349249f..9ffa24b 100644 (file)
@@ -64,7 +64,7 @@
  *                usually under some other lock to prevent node disappearing
  *             dtime: unused node list lock
  *             v4daddr: unchangeable
- *             ip_id_count: idlock
+ *             ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
        peer_cachep = kmem_cache_create("inet_peer_cache",
                        sizeof(struct inet_peer),
-                       0, SLAB_PANIC,
+                       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
                        NULL);
 
        /* All the timers, started at system startup tend
index a291edb..03430de 100644 (file)
@@ -2881,6 +2881,7 @@ static int rt_fill_info(struct net *net,
        error = rt->dst.error;
        expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
        if (rt->peer) {
+               inet_peer_refcheck(rt->peer);
                id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
                if (rt->peer->tcp_ts_stamp) {
                        ts = rt->peer->tcp_ts;
index 7f9515c..2e41e6f 100644 (file)
@@ -204,10 +204,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
                 * when trying new connection.
                 */
-               if (peer != NULL &&
-                   (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-                       tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-                       tp->rx_opt.ts_recent = peer->tcp_ts;
+               if (peer) {
+                       inet_peer_refcheck(peer);
+                       if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+                               tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+                               tp->rx_opt.ts_recent = peer->tcp_ts;
+                       }
                }
        }
 
@@ -1351,6 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                    (dst = inet_csk_route_req(sk, req)) != NULL &&
                    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
                    peer->v4daddr == saddr) {
+                       inet_peer_refcheck(peer);
                        if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
                            (s32)(peer->tcp_ts - req->ts_recent) >
                                                        TCP_PAWS_WINDOW) {