[S390] Cleanup & optimize bitops.
authorMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 5 Feb 2008 15:50:43 +0000 (16:50 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Tue, 5 Feb 2008 15:50:58 +0000 (16:50 +0100)
The bitops header is now a bit shorter and easier to understand since
it uses less inline assembly. It requires some tricks to persuade the
compiler to generate decent code. The ffz/ffs functions now use the
_zb_findmap/_sb_findmap table as well.
With this cleanup the new bitops for ext4 can be implemented with a
few lines, instead of another large inline assembly.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
include/asm-s390/bitops.h

index dba6fec..95d9395 100644 (file)
@@ -440,242 +440,256 @@ __constant_test_bit(unsigned long nr, const volatile unsigned long *addr) {
  __test_bit((nr),(addr)) )
 
 /*
- * ffz = Find First Zero in word. Undefined if no zero exists,
- * so code should check against ~0UL first..
+ * Optimized find bit helper functions.
  */
-static inline unsigned long ffz(unsigned long word)
+
+/**
+ * __ffz_word_loop - find byte offset of first long != -1UL
+ * @addr: pointer to array of unsigned long
+ * @size: size of the array in bits
+ */
+static inline unsigned long __ffz_word_loop(const unsigned long *addr,
+                                           unsigned long size)
+{
+       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
+       unsigned long bytes = 0;
+
+       asm volatile(
+#ifndef __s390x__
+               "       ahi     %1,31\n"
+               "       srl     %1,5\n"
+               "0:     c       %2,0(%0,%3)\n"
+               "       jne     1f\n"
+               "       la      %0,4(%0)\n"
+               "       brct    %1,0b\n"
+               "1:\n"
+#else
+               "       aghi    %1,63\n"
+               "       srlg    %1,%1,6\n"
+               "0:     cg      %2,0(%0,%3)\n"
+               "       jne     1f\n"
+               "       la      %0,8(%0)\n"
+               "       brct    %1,0b\n"
+               "1:\n"
+#endif
+               : "+a" (bytes), "+d" (size)
+               : "d" (-1UL), "a" (addr), "m" (*(addrtype *) addr)
+               : "cc" );
+       return bytes;
+}
+
+/**
+ * __ffs_word_loop - find byte offset of first long != 0UL
+ * @addr: pointer to array of unsigned long
+ * @size: size of the array in bits
+ */
+static inline unsigned long __ffs_word_loop(const unsigned long *addr,
+                                           unsigned long size)
 {
-        unsigned long bit = 0;
+       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
+       unsigned long bytes = 0;
 
+       asm volatile(
+#ifndef __s390x__
+               "       ahi     %1,31\n"
+               "       srl     %1,5\n"
+               "0:     c       %2,0(%0,%3)\n"
+               "       jne     1f\n"
+               "       la      %0,4(%0)\n"
+               "       brct    %1,0b\n"
+               "1:\n"
+#else
+               "       aghi    %1,63\n"
+               "       srlg    %1,%1,6\n"
+               "0:     cg      %2,0(%0,%3)\n"
+               "       jne     1f\n"
+               "       la      %0,8(%0)\n"
+               "       brct    %1,0b\n"
+               "1:\n"
+#endif
+               : "+a" (bytes), "+a" (size)
+               : "d" (0UL), "a" (addr), "m" (*(addrtype *) addr)
+               : "cc" );
+       return bytes;
+}
+
+/**
+ * __ffz_word - add number of the first unset bit
+ * @nr: base value the bit number is added to
+ * @word: the word that is searched for unset bits
+ */
+static inline unsigned long __ffz_word(unsigned long nr, unsigned long word)
+{
 #ifdef __s390x__
        if (likely((word & 0xffffffff) == 0xffffffff)) {
                word >>= 32;
-               bit += 32;
+               nr += 32;
        }
 #endif
        if (likely((word & 0xffff) == 0xffff)) {
                word >>= 16;
-               bit += 16;
+               nr += 16;
        }
        if (likely((word & 0xff) == 0xff)) {
                word >>= 8;
-               bit += 8;
+               nr += 8;
        }
-       return bit + _zb_findmap[word & 0xff];
+       return nr + _zb_findmap[(unsigned char) word];
 }
 
-/*
- * __ffs = find first bit in word. Undefined if no bit exists,
- * so code should check against 0UL first..
+/**
+ * __ffs_word - add number of the first set bit
+ * @nr: base value the bit number is added to
+ * @word: the word that is searched for set bits
  */
-static inline unsigned long __ffs (unsigned long word)
+static inline unsigned long __ffs_word(unsigned long nr, unsigned long word)
 {
-       unsigned long bit = 0;
-
 #ifdef __s390x__
        if (likely((word & 0xffffffff) == 0)) {
                word >>= 32;
-               bit += 32;
+               nr += 32;
        }
 #endif
        if (likely((word & 0xffff) == 0)) {
                word >>= 16;
-               bit += 16;
+               nr += 16;
        }
        if (likely((word & 0xff) == 0)) {
                word >>= 8;
-               bit += 8;
+               nr += 8;
        }
-       return bit + _sb_findmap[word & 0xff];
+       return nr + _sb_findmap[(unsigned char) word];
 }
 
-/*
- * Find-bit routines..
- */
 
-#ifndef __s390x__
+/**
+ * __load_ulong_be - load big endian unsigned long
+ * @p: pointer to array of unsigned long
+ * @offset: byte offset of source value in the array
+ */
+static inline unsigned long __load_ulong_be(const unsigned long *p,
+                                           unsigned long offset)
+{
+       p = (unsigned long *)((unsigned long) p + offset);
+       return *p;
+}
 
-static inline int
-find_first_zero_bit(const unsigned long * addr, unsigned long size)
+/**
+ * __load_ulong_le - load little endian unsigned long
+ * @p: pointer to array of unsigned long
+ * @offset: byte offset of source value in the array
+ */
+static inline unsigned long __load_ulong_le(const unsigned long *p,
+                                           unsigned long offset)
 {
-       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-       unsigned long cmp, count;
-        unsigned int res;
+       unsigned long word;
 
-        if (!size)
-                return 0;
+       p = (unsigned long *)((unsigned long) p + offset);
+#ifndef __s390x__
        asm volatile(
-               "       lhi     %1,-1\n"
-               "       lr      %2,%3\n"
-               "       slr     %0,%0\n"
-               "       ahi     %2,31\n"
-               "       srl     %2,5\n"
-               "0:     c       %1,0(%0,%4)\n"
-               "       jne     1f\n"
-               "       la      %0,4(%0)\n"
-               "       brct    %2,0b\n"
-               "       lr      %0,%3\n"
-               "       j       4f\n"
-               "1:     l       %2,0(%0,%4)\n"
-               "       sll     %0,3\n"
-               "       lhi     %1,0xff\n"
-               "       tml     %2,0xffff\n"
-               "       jno     2f\n"
-               "       ahi     %0,16\n"
-               "       srl     %2,16\n"
-               "2:     tml     %2,0x00ff\n"
-               "       jno     3f\n"
-               "       ahi     %0,8\n"
-               "       srl     %2,8\n"
-               "3:     nr      %2,%1\n"
-               "       ic      %2,0(%2,%5)\n"
-               "       alr     %0,%2\n"
-               "4:"
-               : "=&a" (res), "=&d" (cmp), "=&a" (count)
-               : "a" (size), "a" (addr), "a" (&_zb_findmap),
-                 "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
+               "       ic      %0,0(%1)\n"
+               "       icm     %0,2,1(%1)\n"
+               "       icm     %0,4,2(%1)\n"
+               "       icm     %0,8,3(%1)"
+               : "=&d" (word) : "a" (p), "m" (*p) : "cc");
+#else
+       asm volatile(
+               "       lrvg    %0,%1"
+               : "=d" (word) : "m" (*p) );
+#endif
+       return word;
 }
 
-static inline int
-find_first_bit(const unsigned long * addr, unsigned long size)
+/*
+ * The various find bit functions.
+ */
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static inline unsigned long ffz(unsigned long word)
 {
-       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-       unsigned long cmp, count;
-        unsigned int res;
+       return __ffz_word(0, word);
+}
 
-        if (!size)
-                return 0;
-       asm volatile(
-               "       slr     %1,%1\n"
-               "       lr      %2,%3\n"
-               "       slr     %0,%0\n"
-               "       ahi     %2,31\n"
-               "       srl     %2,5\n"
-               "0:     c       %1,0(%0,%4)\n"
-               "       jne     1f\n"
-               "       la      %0,4(%0)\n"
-               "       brct    %2,0b\n"
-               "       lr      %0,%3\n"
-               "       j       4f\n"
-               "1:     l       %2,0(%0,%4)\n"
-               "       sll     %0,3\n"
-               "       lhi     %1,0xff\n"
-               "       tml     %2,0xffff\n"
-               "       jnz     2f\n"
-               "       ahi     %0,16\n"
-               "       srl     %2,16\n"
-               "2:     tml     %2,0x00ff\n"
-               "       jnz     3f\n"
-               "       ahi     %0,8\n"
-               "       srl     %2,8\n"
-               "3:     nr      %2,%1\n"
-               "       ic      %2,0(%2,%5)\n"
-               "       alr     %0,%2\n"
-               "4:"
-               : "=&a" (res), "=&d" (cmp), "=&a" (count)
-               : "a" (size), "a" (addr), "a" (&_sb_findmap),
-                 "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs (unsigned long word)
+{
+       return __ffs_word(0, word);
 }
 
-#else /* __s390x__ */
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+       if (!x)
+               return 0;
+       return __ffs_word(1, x);
+}
 
-static inline unsigned long
-find_first_zero_bit(const unsigned long * addr, unsigned long size)
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static inline unsigned long find_first_zero_bit(const unsigned long *addr,
+                                               unsigned long size)
 {
-       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-        unsigned long res, cmp, count;
+       unsigned long bytes, bits;
 
         if (!size)
                 return 0;
-       asm volatile(
-               "       lghi    %1,-1\n"
-               "       lgr     %2,%3\n"
-               "       slgr    %0,%0\n"
-               "       aghi    %2,63\n"
-               "       srlg    %2,%2,6\n"
-               "0:     cg      %1,0(%0,%4)\n"
-               "       jne     1f\n"
-               "       la      %0,8(%0)\n"
-               "       brct    %2,0b\n"
-               "       lgr     %0,%3\n"
-               "       j       5f\n"
-               "1:     lg      %2,0(%0,%4)\n"
-               "       sllg    %0,%0,3\n"
-               "       clr     %2,%1\n"
-               "       jne     2f\n"
-               "       aghi    %0,32\n"
-               "       srlg    %2,%2,32\n"
-               "2:     lghi    %1,0xff\n"
-               "       tmll    %2,0xffff\n"
-               "       jno     3f\n"
-               "       aghi    %0,16\n"
-               "       srl     %2,16\n"
-               "3:     tmll    %2,0x00ff\n"
-               "       jno     4f\n"
-               "       aghi    %0,8\n"
-               "       srl     %2,8\n"
-               "4:     ngr     %2,%1\n"
-               "       ic      %2,0(%2,%5)\n"
-               "       algr    %0,%2\n"
-               "5:"
-               : "=&a" (res), "=&d" (cmp), "=&a" (count)
-               : "a" (size), "a" (addr), "a" (&_zb_findmap),
-                 "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
-}
-
-static inline unsigned long
-find_first_bit(const unsigned long * addr, unsigned long size)
+       bytes = __ffz_word_loop(addr, size);
+       bits = __ffz_word(bytes*8, __load_ulong_be(addr, bytes));
+       return (bits < size) ? bits : size;
+}
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+static inline unsigned long find_first_bit(const unsigned long * addr,
+                                          unsigned long size)
 {
-       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-        unsigned long res, cmp, count;
+       unsigned long bytes, bits;
 
         if (!size)
                 return 0;
-       asm volatile(
-               "       slgr    %1,%1\n"
-               "       lgr     %2,%3\n"
-               "       slgr    %0,%0\n"
-               "       aghi    %2,63\n"
-               "       srlg    %2,%2,6\n"
-               "0:     cg      %1,0(%0,%4)\n"
-               "       jne     1f\n"
-               "       aghi    %0,8\n"
-               "       brct    %2,0b\n"
-               "       lgr     %0,%3\n"
-               "       j       5f\n"
-               "1:     lg      %2,0(%0,%4)\n"
-               "       sllg    %0,%0,3\n"
-               "       clr     %2,%1\n"
-               "       jne     2f\n"
-               "       aghi    %0,32\n"
-               "       srlg    %2,%2,32\n"
-               "2:     lghi    %1,0xff\n"
-               "       tmll    %2,0xffff\n"
-               "       jnz     3f\n"
-               "       aghi    %0,16\n"
-               "       srl     %2,16\n"
-               "3:     tmll    %2,0x00ff\n"
-               "       jnz     4f\n"
-               "       aghi    %0,8\n"
-               "       srl     %2,8\n"
-               "4:     ngr     %2,%1\n"
-               "       ic      %2,0(%2,%5)\n"
-               "       algr    %0,%2\n"
-               "5:"
-               : "=&a" (res), "=&d" (cmp), "=&a" (count)
-               : "a" (size), "a" (addr), "a" (&_sb_findmap),
-                 "m" (*(addrtype *) addr) : "cc");
-        return (res < size) ? res : size;
+       bytes = __ffs_word_loop(addr, size);
+       bits = __ffs_word(bytes*8, __load_ulong_be(addr, bytes));
+       return (bits < size) ? bits : size;
 }
 
-#endif /* __s390x__ */
-
-static inline int
-find_next_zero_bit (const unsigned long * addr, unsigned long size,
-                   unsigned long offset)
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static inline int find_next_zero_bit (const unsigned long * addr,
+                                     unsigned long size,
+                                     unsigned long offset)
 {
         const unsigned long *p;
        unsigned long bit, set;
@@ -688,10 +702,10 @@ find_next_zero_bit (const unsigned long * addr, unsigned long size,
        p = addr + offset / __BITOPS_WORDSIZE;
        if (bit) {
                /*
-                * s390 version of ffz returns __BITOPS_WORDSIZE
+                * __ffz_word returns __BITOPS_WORDSIZE
                 * if no zero bit is present in the word.
                 */
-               set = ffz(*p >> bit) + bit;
+               set = __ffz_word(0, *p >> bit) + bit;
                if (set >= size)
                        return size + offset;
                if (set < __BITOPS_WORDSIZE)
@@ -703,9 +717,15 @@ find_next_zero_bit (const unsigned long * addr, unsigned long size,
        return offset + find_first_zero_bit(p, size);
 }
 
-static inline int
-find_next_bit (const unsigned long * addr, unsigned long size,
-              unsigned long offset)
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static inline int find_next_bit (const unsigned long * addr,
+                                unsigned long size,
+                                unsigned long offset)
 {
         const unsigned long *p;
        unsigned long bit, set;
@@ -718,10 +738,10 @@ find_next_bit (const unsigned long * addr, unsigned long size,
        p = addr + offset / __BITOPS_WORDSIZE;
        if (bit) {
                /*
-                * s390 version of __ffs returns __BITOPS_WORDSIZE
+                * __ffs_word returns __BITOPS_WORDSIZE
                 * if no one bit is present in the word.
                 */
-               set = __ffs(*p & (~0UL << bit));
+               set = __ffs_word(0, *p & (~0UL << bit));
                if (set >= size)
                        return size + offset;
                if (set < __BITOPS_WORDSIZE)
@@ -744,8 +764,6 @@ static inline int sched_find_first_bit(unsigned long *b)
        return find_first_bit(b, 140);
 }
 
-#include <asm-generic/bitops/ffs.h>
-
 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/fls64.h>
 
@@ -775,105 +793,22 @@ static inline int sched_find_first_bit(unsigned long *b)
 #define ext2_find_next_bit(addr, size, off) \
        generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
 
-#ifndef __s390x__
-
-static inline int 
-ext2_find_first_zero_bit(void *vaddr, unsigned int size)
+static inline int ext2_find_first_zero_bit(void *vaddr, unsigned int size)
 {
-       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-       unsigned long cmp, count;
-        unsigned int res;
+       unsigned long bytes, bits;
 
         if (!size)
                 return 0;
-       asm volatile(
-               "       lhi     %1,-1\n"
-               "       lr      %2,%3\n"
-               "       ahi     %2,31\n"
-               "       srl     %2,5\n"
-               "       slr     %0,%0\n"
-               "0:     cl      %1,0(%0,%4)\n"
-               "       jne     1f\n"
-               "       ahi     %0,4\n"
-               "       brct    %2,0b\n"
-               "       lr      %0,%3\n"
-               "       j       4f\n"
-               "1:     l       %2,0(%0,%4)\n"
-               "       sll     %0,3\n"
-               "       ahi     %0,24\n"
-               "       lhi     %1,0xff\n"
-               "       tmh     %2,0xffff\n"
-               "       jo      2f\n"
-               "       ahi     %0,-16\n"
-               "       srl     %2,16\n"
-               "2:     tml     %2,0xff00\n"
-               "       jo      3f\n"
-               "       ahi     %0,-8\n"
-               "       srl     %2,8\n"
-               "3:     nr      %2,%1\n"
-               "       ic      %2,0(%2,%5)\n"
-               "       alr     %0,%2\n"
-               "4:"
-               : "=&a" (res), "=&d" (cmp), "=&a" (count)
-               : "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-                 "m" (*(addrtype *) vaddr) : "cc");
-        return (res < size) ? res : size;
+       bytes = __ffz_word_loop(vaddr, size);
+       bits = __ffz_word(bytes*8, __load_ulong_le(vaddr, bytes));
+       return (bits < size) ? bits : size;
 }
 
-#else /* __s390x__ */
-
-static inline unsigned long
-ext2_find_first_zero_bit(void *vaddr, unsigned long size)
-{
-       typedef struct { long _[__BITOPS_WORDS(size)]; } addrtype;
-        unsigned long res, cmp, count;
-
-        if (!size)
-                return 0;
-       asm volatile(
-               "       lghi    %1,-1\n"
-               "       lgr     %2,%3\n"
-               "       aghi    %2,63\n"
-               "       srlg    %2,%2,6\n"
-               "       slgr    %0,%0\n"
-               "0:     clg     %1,0(%0,%4)\n"
-               "       jne     1f\n"
-               "       aghi    %0,8\n"
-               "       brct    %2,0b\n"
-               "       lgr     %0,%3\n"
-               "       j       5f\n"
-               "1:     cl      %1,0(%0,%4)\n"
-               "       jne     2f\n"
-               "       aghi    %0,4\n"
-               "2:     l       %2,0(%0,%4)\n"
-               "       sllg    %0,%0,3\n"
-               "       aghi    %0,24\n"
-               "       lghi    %1,0xff\n"
-               "       tmlh    %2,0xffff\n"
-               "       jo      3f\n"
-               "       aghi    %0,-16\n"
-               "       srl     %2,16\n"
-               "3:     tmll    %2,0xff00\n"
-               "       jo      4f\n"
-               "       aghi    %0,-8\n"
-               "       srl     %2,8\n"
-               "4:     ngr     %2,%1\n"
-               "       ic      %2,0(%2,%5)\n"
-               "       algr    %0,%2\n"
-               "5:"
-               : "=&a" (res), "=&d" (cmp), "=&a" (count)
-               : "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-                 "m" (*(addrtype *) vaddr) : "cc");
-        return (res < size) ? res : size;
-}
-
-#endif /* __s390x__ */
-
-static inline int
-ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
+static inline int ext2_find_next_zero_bit(void *vaddr, unsigned long size,
+                                         unsigned long offset)
 {
         unsigned long *addr = vaddr, *p;
-       unsigned long word, bit, set;
+       unsigned long bit, set;
 
         if (offset >= size)
                 return size;
@@ -882,23 +817,11 @@ ext2_find_next_zero_bit(void *vaddr, unsigned long size, unsigned long offset)
        size -= offset;
        p = addr + offset / __BITOPS_WORDSIZE;
         if (bit) {
-#ifndef __s390x__
-               asm volatile(
-                       "       ic      %0,0(%1)\n"
-                       "       icm     %0,2,1(%1)\n"
-                       "       icm     %0,4,2(%1)\n"
-                       "       icm     %0,8,3(%1)"
-                       : "=&a" (word) : "a" (p), "m" (*p) : "cc");
-#else
-               asm volatile(
-                       "       lrvg    %0,%1"
-                       : "=a" (word) : "m" (*p) );
-#endif
                /*
                 * s390 version of ffz returns __BITOPS_WORDSIZE
                 * if no zero bit is present in the word.
                 */
-               set = ffz(word >> bit) + bit;
+               set = ffz(__load_ulong_le(p, 0) >> bit) + bit;
                if (set >= size)
                        return size + offset;
                if (set < __BITOPS_WORDSIZE)