Blackfin arch: SMP supporting patchset: BF561 related code
authorGraf Yang <graf.yang@analog.com>
Wed, 7 Jan 2009 15:14:39 +0000 (23:14 +0800)
committerBryan Wu <cooloney@kernel.org>
Wed, 7 Jan 2009 15:14:39 +0000 (23:14 +0800)
Blackfin dual core BF561 processor can support SMP like features.
https://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:smp-like

In this patch, we provide SMP extend to BF561 kernel code

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
arch/blackfin/mach-bf561/Kconfig
arch/blackfin/mach-bf561/Makefile
arch/blackfin/mach-bf561/atomic.S [new file with mode: 0644]
arch/blackfin/mach-bf561/include/mach/blackfin.h
arch/blackfin/mach-bf561/include/mach/defBF561.h
arch/blackfin/mach-bf561/include/mach/mem_map.h
arch/blackfin/mach-bf561/include/mach/smp.h [new file with mode: 0644]
arch/blackfin/mach-bf561/secondary.S [new file with mode: 0644]
arch/blackfin/mach-bf561/smp.c [new file with mode: 0644]

index 3f48954..5d56438 100644 (file)
@@ -4,9 +4,9 @@ source "arch/blackfin/mach-bf561/boards/Kconfig"
 
 menu "BF561 Specific Configuration"
 
-comment "Core B Support"
+if (!SMP)
 
-menu "Core B Support"
+comment "Core B Support"
 
 config BF561_COREB
        bool "Enable Core B support"
@@ -25,7 +25,7 @@ config BF561_COREB_RESET
          0 is set, and will reset PC to 0xff600000 when
          COREB_SRAM_INIT is cleared.
 
-endmenu
+endif
 
 comment "Interrupt Priority Assignment"
 
index f39235a..c37f00c 100644 (file)
@@ -7,3 +7,4 @@ extra-y := head.o
 obj-y := ints-priority.o dma.o
 
 obj-$(CONFIG_BF561_COREB) += coreb.o
+obj-$(CONFIG_SMP)  += smp.o secondary.o atomic.o
diff --git a/arch/blackfin/mach-bf561/atomic.S b/arch/blackfin/mach-bf561/atomic.S
new file mode 100644 (file)
index 0000000..9439bc6
--- /dev/null
@@ -0,0 +1,919 @@
+/*
+ * File:         arch/blackfin/mach-bf561/atomic.S
+ * Author:       Philippe Gerum <rpm@xenomai.org>
+ *
+ *               Copyright 2007 Analog Devices Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/linkage.h>
+#include <asm/blackfin.h>
+#include <asm/cache.h>
+#include <asm/asm-offsets.h>
+#include <asm/rwlock.h>
+#include <asm/cplb.h>
+
+.text
+
+.macro coreslot_loadaddr reg:req
+       \reg\().l = _corelock;
+       \reg\().h = _corelock;
+.endm
+
+/*
+ * r0 = address of atomic data to flush and invalidate (32bit).
+ *
+ * Clear interrupts and return the old mask.
+ * We assume that no atomic data can span cachelines.
+ *
+ * Clobbers: r2:0, p0
+ */
+ENTRY(_get_core_lock)
+       r1 = -L1_CACHE_BYTES;
+       r1 = r0 & r1;
+       cli r0;
+       coreslot_loadaddr p0;
+.Lretry_corelock:
+       testset (p0);
+       if cc jump .Ldone_corelock;
+       SSYNC(r2);
+       jump .Lretry_corelock
+.Ldone_corelock:
+       p0 = r1;
+       CSYNC(r2);
+       flushinv[p0];
+       SSYNC(r2);
+       rts;
+ENDPROC(_get_core_lock)
+
+/*
+ * r0 = address of atomic data in uncacheable memory region (32bit).
+ *
+ * Clear interrupts and return the old mask.
+ *
+ * Clobbers: r0, p0
+ */
+ENTRY(_get_core_lock_noflush)
+       cli r0;
+       coreslot_loadaddr p0;
+.Lretry_corelock_noflush:
+       testset (p0);
+       if cc jump .Ldone_corelock_noflush;
+       SSYNC(r2);
+       jump .Lretry_corelock_noflush
+.Ldone_corelock_noflush:
+       rts;
+ENDPROC(_get_core_lock_noflush)
+
+/*
+ * r0 = interrupt mask to restore.
+ * r1 = address of atomic data to flush and invalidate (32bit).
+ *
+ * Interrupts are masked on entry (see _get_core_lock).
+ * Clobbers: r2:0, p0
+ */
+ENTRY(_put_core_lock)
+       /* Write-through cache assumed, so no flush needed here. */
+       coreslot_loadaddr p0;
+       r1 = 0;
+       [p0] = r1;
+       SSYNC(r2);
+       sti r0;
+       rts;
+ENDPROC(_put_core_lock)
+
+#ifdef __ARCH_SYNC_CORE_DCACHE
+
+ENTRY(___raw_smp_mark_barrier_asm)
+       [--sp] = rets;
+       [--sp] = ( r7:5 );
+       [--sp] = r0;
+       [--sp] = p1;
+       [--sp] = p0;
+       call _get_core_lock_noflush;
+
+       /*
+        * Calculate current core mask
+        */
+       GET_CPUID(p1, r7);
+       r6 = 1;
+       r6 <<= r7;
+
+       /*
+        * Set bit of other cores in barrier mask. Don't change current core bit.
+        */
+       p1.l = _barrier_mask;
+       p1.h = _barrier_mask;
+       r7 = [p1];
+       r5 = r7 & r6;
+       r7 = ~r6;
+       cc = r5 == 0;
+       if cc jump 1f;
+       r7 = r7 | r6;
+1:
+       [p1] = r7;
+       SSYNC(r2);
+
+       call _put_core_lock;
+       p0 = [sp++];
+       p1 = [sp++];
+       r0 = [sp++];
+       ( r7:5 ) = [sp++];
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_smp_mark_barrier_asm)
+
+ENTRY(___raw_smp_check_barrier_asm)
+       [--sp] = rets;
+       [--sp] = ( r7:5 );
+       [--sp] = r0;
+       [--sp] = p1;
+       [--sp] = p0;
+       call _get_core_lock_noflush;
+
+       /*
+        * Calculate current core mask
+        */
+       GET_CPUID(p1, r7);
+       r6 = 1;
+       r6 <<= r7;
+
+       /*
+        * Clear current core bit in barrier mask if it is set.
+        */
+       p1.l = _barrier_mask;
+       p1.h = _barrier_mask;
+       r7 = [p1];
+       r5 = r7 & r6;
+       cc = r5 == 0;
+       if cc jump 1f;
+       r6 = ~r6;
+       r7 = r7 & r6;
+       [p1] = r7;
+       SSYNC(r2);
+
+       call _put_core_lock;
+
+       /*
+        * Invalidate the entire D-cache of current core.
+        */
+       sp += -12;
+       call _resync_core_dcache
+       sp += 12;
+       jump 2f;
+1:
+       call _put_core_lock;
+2:
+       p0 = [sp++];
+       p1 = [sp++];
+       r0 = [sp++];
+       ( r7:5 ) = [sp++];
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_smp_check_barrier_asm)
+
+/*
+ * r0 = irqflags
+ * r1 = address of atomic data
+ *
+ * Clobbers: r2:0, p1:0
+ */
+_start_lock_coherent:
+
+       [--sp] = rets;
+       [--sp] = ( r7:6 );
+       r7 = r0;
+       p1 = r1;
+
+       /*
+        * Determine whether the atomic data was previously
+        * owned by another CPU (=r6).
+        */
+       GET_CPUID(p0, r2);
+       r1 = 1;
+       r1 <<= r2;
+       r2 = ~r1;
+
+       r1 = [p1];
+       r1 >>= 28;   /* CPU fingerprints are stored in the high nibble. */
+       r6 = r1 & r2;
+       r1 = [p1];
+       r1 <<= 4;
+       r1 >>= 4;
+       [p1] = r1;
+
+       /*
+        * Release the core lock now, but keep IRQs disabled while we are
+        * performing the remaining housekeeping chores for the current CPU.
+        */
+       coreslot_loadaddr p0;
+       r1 = 0;
+       [p0] = r1;
+
+       /*
+        * If another CPU has owned the same atomic section before us,
+        * then our D-cached copy of the shared data protected by the
+        * current spin/write_lock may be obsolete.
+        */
+       cc = r6 == 0;
+       if cc jump .Lcache_synced
+
+       /*
+        * Invalidate the entire D-cache of the current core.
+        */
+       sp += -12;
+       call _resync_core_dcache
+       sp += 12;
+
+.Lcache_synced:
+       SSYNC(r2);
+       sti r7;
+       ( r7:6 ) = [sp++];
+       rets = [sp++];
+       rts
+
+/*
+ * r0 = irqflags
+ * r1 = address of atomic data
+ *
+ * Clobbers: r2:0, p1:0
+ */
+_end_lock_coherent:
+
+       p1 = r1;
+       GET_CPUID(p0, r2);
+       r2 += 28;
+       r1 = 1;
+       r1 <<= r2;
+       r2 = [p1];
+       r2 = r1 | r2;
+       [p1] = r2;
+       r1 = p1;
+       jump _put_core_lock;
+
+#endif /* __ARCH_SYNC_CORE_DCACHE */
+
+/*
+ * r0 = &spinlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_spin_is_locked_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+       r3 = [p1];
+       cc = bittst( r3, 0 );
+       r3 = cc;
+       r1 = p1;
+       call _put_core_lock;
+       rets = [sp++];
+       r0 = r3;
+       rts;
+ENDPROC(___raw_spin_is_locked_asm)
+
+/*
+ * r0 = &spinlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_spin_lock_asm)
+       p1 = r0;
+       [--sp] = rets;
+.Lretry_spinlock:
+       call _get_core_lock;
+       r1 = p1;
+       r2 = [p1];
+       cc = bittst( r2, 0 );
+       if cc jump .Lbusy_spinlock
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       r3 = p1;
+       bitset ( r2, 0 ); /* Raise the lock bit. */
+       [p1] = r2;
+       call _start_lock_coherent
+#else
+       r2 = 1;
+       [p1] = r2;
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       rts;
+
+.Lbusy_spinlock:
+       /* We don't touch the atomic area if busy, so that flush
+          will behave like nop in _put_core_lock. */
+       call _put_core_lock;
+       SSYNC(r2);
+       r0 = p1;
+       jump .Lretry_spinlock
+ENDPROC(___raw_spin_lock_asm)
+
+/*
+ * r0 = &spinlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_spin_trylock_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+       r1 = p1;
+       r3 = [p1];
+       cc = bittst( r3, 0 );
+       if cc jump .Lfailed_trylock
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       bitset ( r3, 0 ); /* Raise the lock bit. */
+       [p1] = r3;
+       call _start_lock_coherent
+#else
+       r2 = 1;
+       [p1] = r2;
+       call _put_core_lock;
+#endif
+       r0 = 1;
+       rets = [sp++];
+       rts;
+.Lfailed_trylock:
+       call _put_core_lock;
+       r0 = 0;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_spin_trylock_asm)
+
+/*
+ * r0 = &spinlock->lock
+ *
+ * Clobbers: r2:0, p1:0
+ */
+ENTRY(___raw_spin_unlock_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+       r2 = [p1];
+       bitclr ( r2, 0 );
+       [p1] = r2;
+       r1 = p1;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       call _end_lock_coherent
+#else
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_spin_unlock_asm)
+
+/*
+ * r0 = &rwlock->lock
+ *
+ * Clobbers: r2:0, p1:0
+ */
+ENTRY(___raw_read_lock_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+.Lrdlock_try:
+       r1 = [p1];
+       r1 += -1;
+       [p1] = r1;
+       cc = r1 < 0;
+       if cc jump .Lrdlock_failed
+       r1 = p1;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       call _start_lock_coherent
+#else
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       rts;
+
+.Lrdlock_failed:
+       r1 += 1;
+       [p1] = r1;
+.Lrdlock_wait:
+       r1 = p1;
+       call _put_core_lock;
+       SSYNC(r2);
+       r0 = p1;
+       call _get_core_lock;
+       r1 = [p1];
+       cc = r1 < 2;
+       if cc jump .Lrdlock_wait;
+       jump .Lrdlock_try
+ENDPROC(___raw_read_lock_asm)
+
+/*
+ * r0 = &rwlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_read_trylock_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+       r1 = [p1];
+       cc = r1 <= 0;
+       if cc jump .Lfailed_tryrdlock;
+       r1 += -1;
+       [p1] = r1;
+       r1 = p1;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       call _start_lock_coherent
+#else
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       r0 = 1;
+       rts;
+.Lfailed_tryrdlock:
+       r1 = p1;
+       call _put_core_lock;
+       rets = [sp++];
+       r0 = 0;
+       rts;
+ENDPROC(___raw_read_trylock_asm)
+
+/*
+ * r0 = &rwlock->lock
+ *
+ * Note: Processing controlled by a reader lock should not have
+ * any side-effect on cache issues with the other core, so we
+ * just release the core lock and exit (no _end_lock_coherent).
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_read_unlock_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+       r1 = [p1];
+       r1 += 1;
+       [p1] = r1;
+       r1 = p1;
+       call _put_core_lock;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_read_unlock_asm)
+
+/*
+ * r0 = &rwlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_write_lock_asm)
+       p1 = r0;
+       r3.l = lo(RW_LOCK_BIAS);
+       r3.h = hi(RW_LOCK_BIAS);
+       [--sp] = rets;
+       call _get_core_lock;
+.Lwrlock_try:
+       r1 = [p1];
+       r1 = r1 - r3;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       r2 = r1;
+       r2 <<= 4;
+       r2 >>= 4;
+       cc = r2 == 0;
+#else
+       cc = r1 == 0;
+#endif
+       if !cc jump .Lwrlock_wait
+       [p1] = r1;
+       r1 = p1;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       call _start_lock_coherent
+#else
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       rts;
+
+.Lwrlock_wait:
+       r1 = p1;
+       call _put_core_lock;
+       SSYNC(r2);
+       r0 = p1;
+       call _get_core_lock;
+       r1 = [p1];
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       r1 <<= 4;
+       r1 >>= 4;
+#endif
+       cc = r1 == r3;
+       if !cc jump .Lwrlock_wait;
+       jump .Lwrlock_try
+ENDPROC(___raw_write_lock_asm)
+
+/*
+ * r0 = &rwlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_write_trylock_asm)
+       p1 = r0;
+       [--sp] = rets;
+       call _get_core_lock;
+       r1 = [p1];
+       r2.l = lo(RW_LOCK_BIAS);
+       r2.h = hi(RW_LOCK_BIAS);
+       cc = r1 == r2;
+       if !cc jump .Lfailed_trywrlock;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       r1 >>= 28;
+       r1 <<= 28;
+#else
+       r1 = 0;
+#endif
+       [p1] = r1;
+       r1 = p1;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       call _start_lock_coherent
+#else
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       r0 = 1;
+       rts;
+
+.Lfailed_trywrlock:
+       r1 = p1;
+       call _put_core_lock;
+       rets = [sp++];
+       r0 = 0;
+       rts;
+ENDPROC(___raw_write_trylock_asm)
+
+/*
+ * r0 = &rwlock->lock
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_write_unlock_asm)
+       p1 = r0;
+       r3.l = lo(RW_LOCK_BIAS);
+       r3.h = hi(RW_LOCK_BIAS);
+       [--sp] = rets;
+       call _get_core_lock;
+       r1 = [p1];
+       r1 = r1 + r3;
+       [p1] = r1;
+       r1 = p1;
+#ifdef __ARCH_SYNC_CORE_DCACHE
+       call _end_lock_coherent
+#else
+       call _put_core_lock;
+#endif
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_write_unlock_asm)
+
+/*
+ * r0 = ptr
+ * r1 = value
+ *
+ * Add a signed value to a 32bit word and return the new value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_update_asm)
+       p1 = r0;
+       r3 = r1;
+       [--sp] = rets;
+       call _get_core_lock;
+       r2 = [p1];
+       r3 = r3 + r2;
+       [p1] = r3;
+       r1 = p1;
+       call _put_core_lock;
+       r0 = r3;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_atomic_update_asm)
+
+/*
+ * r0 = ptr
+ * r1 = mask
+ *
+ * Clear the mask bits from a 32bit word and return the old 32bit value
+ * atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_clear_asm)
+       p1 = r0;
+       r3 = ~r1;
+       [--sp] = rets;
+       call _get_core_lock;
+       r2 = [p1];
+       r3 = r2 & r3;
+       [p1] = r3;
+       r3 = r2;
+       r1 = p1;
+       call _put_core_lock;
+       r0 = r3;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_atomic_clear_asm)
+
+/*
+ * r0 = ptr
+ * r1 = mask
+ *
+ * Set the mask bits into a 32bit word and return the old 32bit value
+ * atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_set_asm)
+       p1 = r0;
+       r3 = r1;
+       [--sp] = rets;
+       call _get_core_lock;
+       r2 = [p1];
+       r3 = r2 | r3;
+       [p1] = r3;
+       r3 = r2;
+       r1 = p1;
+       call _put_core_lock;
+       r0 = r3;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_atomic_set_asm)
+
+/*
+ * r0 = ptr
+ * r1 = mask
+ *
+ * XOR the mask bits with a 32bit word and return the old 32bit value
+ * atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_xor_asm)
+       p1 = r0;
+       r3 = r1;
+       [--sp] = rets;
+       call _get_core_lock;
+       r2 = [p1];
+       r3 = r2 ^ r3;
+       [p1] = r3;
+       r3 = r2;
+       r1 = p1;
+       call _put_core_lock;
+       r0 = r3;
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_atomic_xor_asm)
+
+/*
+ * r0 = ptr
+ * r1 = mask
+ *
+ * Perform a logical AND between the mask bits and a 32bit word, and
+ * return the masked value. We need this on this architecture in
+ * order to invalidate the local cache before testing.
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_atomic_test_asm)
+       p1 = r0;
+       r3 = r1;
+       r1 = -L1_CACHE_BYTES;
+       r1 = r0 & r1;
+       p0 = r1;
+       flushinv[p0];
+       SSYNC(r2);
+       r0 = [p1];
+       r0 = r0 & r3;
+       rts;
+ENDPROC(___raw_atomic_test_asm)
+
+/*
+ * r0 = ptr
+ * r1 = value
+ *
+ * Swap *ptr with value and return the old 32bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+#define        __do_xchg(src, dst)             \
+       p1 = r0;                        \
+       r3 = r1;                        \
+       [--sp] = rets;                  \
+       call _get_core_lock;            \
+       r2 = src;                       \
+       dst = r3;                       \
+       r3 = r2;                        \
+       r1 = p1;                        \
+       call _put_core_lock;            \
+       r0 = r3;                        \
+       rets = [sp++];                  \
+       rts;
+
+ENTRY(___raw_xchg_1_asm)
+       __do_xchg(b[p1] (z), b[p1])
+ENDPROC(___raw_xchg_1_asm)
+
+ENTRY(___raw_xchg_2_asm)
+       __do_xchg(w[p1] (z), w[p1])
+ENDPROC(___raw_xchg_2_asm)
+
+ENTRY(___raw_xchg_4_asm)
+       __do_xchg([p1], [p1])
+ENDPROC(___raw_xchg_4_asm)
+
+/*
+ * r0 = ptr
+ * r1 = new
+ * r2 = old
+ *
+ * Swap *ptr with new if *ptr == old and return the previous *ptr
+ * value atomically.
+ *
+ * Clobbers: r3:0, p1:0
+ */
+#define        __do_cmpxchg(src, dst)          \
+       [--sp] = rets;                  \
+       [--sp] = r4;                    \
+       p1 = r0;                        \
+       r3 = r1;                        \
+       r4 = r2;                        \
+       call _get_core_lock;            \
+       r2 = src;                       \
+       cc = r2 == r4;                  \
+       if !cc jump 1f;                 \
+       dst = r3;                       \
+     1: r3 = r2;                       \
+       r1 = p1;                        \
+       call _put_core_lock;            \
+       r0 = r3;                        \
+       r4 = [sp++];                    \
+       rets = [sp++];                  \
+       rts;
+
+ENTRY(___raw_cmpxchg_1_asm)
+       __do_cmpxchg(b[p1] (z), b[p1])
+ENDPROC(___raw_cmpxchg_1_asm)
+
+ENTRY(___raw_cmpxchg_2_asm)
+       __do_cmpxchg(w[p1] (z), w[p1])
+ENDPROC(___raw_cmpxchg_2_asm)
+
+ENTRY(___raw_cmpxchg_4_asm)
+       __do_cmpxchg([p1], [p1])
+ENDPROC(___raw_cmpxchg_4_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Set a bit in a 32bit word and return the old 32bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_set_asm)
+       r2 = r1;
+       r1 = 1;
+       r1 <<= r2;
+       jump ___raw_atomic_set_asm
+ENDPROC(___raw_bit_set_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Clear a bit in a 32bit word and return the old 32bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_clear_asm)
+       r2 = r1;
+       r1 = 1;
+       r1 <<= r2;
+       jump ___raw_atomic_clear_asm
+ENDPROC(___raw_bit_clear_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Toggle a bit in a 32bit word and return the old 32bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_toggle_asm)
+       r2 = r1;
+       r1 = 1;
+       r1 <<= r2;
+       jump ___raw_atomic_xor_asm
+ENDPROC(___raw_bit_toggle_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Test-and-set a bit in a 32bit word and return the old bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_test_set_asm)
+       [--sp] = rets;
+       [--sp] = r1;
+       call ___raw_bit_set_asm
+       r1 = [sp++];
+       r2 = 1;
+       r2 <<= r1;
+       r0 = r0 & r2;
+       cc = r0 == 0;
+       if cc jump 1f
+       r0 = 1;
+1:
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_bit_test_set_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Test-and-clear a bit in a 32bit word and return the old bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_test_clear_asm)
+       [--sp] = rets;
+       [--sp] = r1;
+       call ___raw_bit_clear_asm
+       r1 = [sp++];
+       r2 = 1;
+       r2 <<= r1;
+       r0 = r0 & r2;
+       cc = r0 == 0;
+       if cc jump 1f
+       r0 = 1;
+1:
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_bit_test_clear_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Test-and-toggle a bit in a 32bit word,
+ * and return the old bit value atomically.
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_test_toggle_asm)
+       [--sp] = rets;
+       [--sp] = r1;
+       call ___raw_bit_toggle_asm
+       r1 = [sp++];
+       r2 = 1;
+       r2 <<= r1;
+       r0 = r0 & r2;
+       cc = r0 == 0;
+       if cc jump 1f
+       r0 = 1;
+1:
+       rets = [sp++];
+       rts;
+ENDPROC(___raw_bit_test_toggle_asm)
+
+/*
+ * r0 = ptr
+ * r1 = bitnr
+ *
+ * Test a bit in a 32bit word and return its value.
+ * We need this on this architecture in order to invalidate
+ * the local cache before testing.
+ *
+ * Clobbers: r3:0, p1:0
+ */
+ENTRY(___raw_bit_test_asm)
+       r2 = r1;
+       r1 = 1;
+       r1 <<= r2;
+       jump ___raw_atomic_test_asm
+ENDPROC(___raw_bit_test_asm)
+
+/*
+ * r0 = ptr
+ *
+ * Fetch and return an uncached 32bit value.
+ *
+ * Clobbers: r2:0, p1:0
+ */
+ENTRY(___raw_uncached_fetch_asm)
+       p1 = r0;
+       r1 = -L1_CACHE_BYTES;
+       r1 = r0 & r1;
+       p0 = r1;
+       flushinv[p0];
+       SSYNC(r2);
+       r0 = [p1];
+       rts;
+ENDPROC(___raw_uncached_fetch_asm)
index 0ea8666..f79f662 100644 (file)
 
 #define bfin_read_SIC_IMASK(x)         bfin_read32(SICA_IMASK0 + (x << 2))
 #define bfin_write_SIC_IMASK(x, val)   bfin_write32((SICA_IMASK0 + (x << 2)), val)
+#define bfin_read_SICB_IMASK(x)                bfin_read32(SICB_IMASK0 + (x << 2))
+#define bfin_write_SICB_IMASK(x, val)  bfin_write32((SICB_IMASK0 + (x << 2)), val)
 #define bfin_read_SIC_ISR(x)           bfin_read32(SICA_ISR0 + (x << 2))
 #define bfin_write_SIC_ISR(x, val)     bfin_write32((SICA_ISR0 + (x << 2)), val)
+#define bfin_read_SICB_ISR(x)          bfin_read32(SICB_ISR0 + (x << 2))
+#define bfin_write_SICB_ISR(x, val)    bfin_write32((SICB_ISR0 + (x << 2)), val)
 
 #define BFIN_UART_NR_PORTS      1
 
index 4eca202..d7c5097 100644 (file)
 #define ACTIVE_PLLDISABLED     0x0004  /* Processor In Active Mode With PLL Disabled   */
 #define        PLL_LOCKED                      0x0020  /* PLL_LOCKCNT Has Been Reached                                 */
 
+/* SICA_SYSCR Masks */
+#define COREB_SRAM_INIT                0x0020
+
 /* SWRST Mask */
 #define SYSTEM_RESET           0x0007  /* Initiates a system software reset */
 #define DOUBLE_FAULT_A         0x0008  /* Core A Double Fault Causes Reset */
index f1d4c06..488c3bd 100644 (file)
 #define L1_SCRATCH_START       COREA_L1_SCRATCH_START
 #define L1_SCRATCH_LENGTH      0x1000
 
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
+
+#define get_l1_scratch_start_cpu(cpu)                          \
+       ({ unsigned long __addr;                                \
+          __addr = (cpu) ? COREB_L1_SCRATCH_START : COREA_L1_SCRATCH_START;\
+          __addr; })
+
+#define get_l1_code_start_cpu(cpu)                             \
+       ({ unsigned long __addr;                                \
+          __addr = (cpu) ? COREB_L1_CODE_START : COREA_L1_CODE_START;  \
+          __addr; })
+
+#define get_l1_data_a_start_cpu(cpu)                           \
+       ({ unsigned long __addr;                                \
+          __addr = (cpu) ? COREB_L1_DATA_A_START : COREA_L1_DATA_A_START;\
+          __addr; })
+
+#define get_l1_data_b_start_cpu(cpu)                           \
+       ({ unsigned long __addr;                                \
+          __addr = (cpu) ? COREB_L1_DATA_B_START : COREA_L1_DATA_B_START;\
+          __addr; })
+
+#define get_l1_scratch_start() get_l1_scratch_start_cpu(blackfin_core_id())
+#define get_l1_code_start()    get_l1_code_start_cpu(blackfin_core_id())
+#define get_l1_data_a_start()  get_l1_data_a_start_cpu(blackfin_core_id())
+#define get_l1_data_b_start()  get_l1_data_b_start_cpu(blackfin_core_id())
+
+#else /* !CONFIG_SMP */
+#define get_l1_scratch_start_cpu(cpu)  L1_SCRATCH_START
+#define get_l1_code_start_cpu(cpu)     L1_CODE_START
+#define get_l1_data_a_start_cpu(cpu)   L1_DATA_A_START
+#define get_l1_data_b_start_cpu(cpu)   L1_DATA_B_START
+#define get_l1_scratch_start()         L1_SCRATCH_START
+#define get_l1_code_start()            L1_CODE_START
+#define get_l1_data_a_start()          L1_DATA_A_START
+#define get_l1_data_b_start()          L1_DATA_B_START
+#endif /* !CONFIG_SMP */
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * The following macros both return the address of the PDA for the
+ * current core.
+ *
+ * In its first safe (and hairy) form, the macro neither clobbers any
+ * register aside of the output Preg, nor uses the stack, since it
+ * could be called with an invalid stack pointer, or the current stack
+ * space being uncovered by any CPLB (e.g. early exception handling).
+ *
+ * The constraints on the second form are a bit relaxed, and the code
+ * is allowed to use the specified Dreg for determining the PDA
+ * address to be returned into Preg.
+ */
+#ifdef CONFIG_SMP
+#define GET_PDA_SAFE(preg)             \
+       preg.l = lo(DSPID);             \
+       preg.h = hi(DSPID);             \
+       preg = [preg];                  \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       preg = preg << 2;               \
+       if cc jump 2f;                  \
+       cc = preg == 0x0;               \
+       preg.l = _cpu_pda;              \
+       preg.h = _cpu_pda;              \
+       if !cc jump 3f;                 \
+1:                                     \
+       /* preg = 0x0; */               \
+       cc = !cc; /* restore cc to 0 */ \
+       jump 4f;                        \
+2:                                     \
+       cc = preg == 0x0;               \
+       preg.l = _cpu_pda;              \
+       preg.h = _cpu_pda;              \
+       if cc jump 4f;                  \
+       /* preg = 0x1000000; */         \
+       cc = !cc; /* restore cc to 1 */ \
+3:                                     \
+       preg = [preg];                  \
+4:
+
+#define GET_PDA(preg, dreg)            \
+       preg.l = lo(DSPID);             \
+       preg.h = hi(DSPID);             \
+       dreg = [preg];                  \
+       preg.l = _cpu_pda;              \
+       preg.h = _cpu_pda;              \
+       cc = bittst(dreg, 0);           \
+       if !cc jump 1f;                 \
+       preg = [preg];                  \
+1:                                     \
+
+#define GET_CPUID(preg, dreg)          \
+       preg.l = lo(DSPID);             \
+       preg.h = hi(DSPID);             \
+       dreg = [preg];                  \
+       dreg = ROT dreg BY -1;          \
+       dreg = CC;
+
+#else
+#define GET_PDA_SAFE(preg)             \
+       preg.l = _cpu_pda;              \
+       preg.h = _cpu_pda;
+
+#define GET_PDA(preg, dreg)    GET_PDA_SAFE(preg)
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
+
 #endif                         /* _MEM_MAP_533_H_ */
diff --git a/arch/blackfin/mach-bf561/include/mach/smp.h b/arch/blackfin/mach-bf561/include/mach/smp.h
new file mode 100644 (file)
index 0000000..f9e65eb
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef _MACH_BF561_SMP
+#define _MACH_BF561_SMP
+
+struct task_struct;
+
+void platform_init_cpus(void);
+
+void platform_prepare_cpus(unsigned int max_cpus);
+
+int platform_boot_secondary(unsigned int cpu, struct task_struct *idle);
+
+void platform_secondary_init(unsigned int cpu);
+
+void platform_request_ipi(int (*handler)(int, void *));
+
+void platform_send_ipi(cpumask_t callmap);
+
+void platform_send_ipi_cpu(unsigned int cpu);
+
+void platform_clear_ipi(unsigned int cpu);
+
+#endif /* !_MACH_BF561_SMP */
diff --git a/arch/blackfin/mach-bf561/secondary.S b/arch/blackfin/mach-bf561/secondary.S
new file mode 100644 (file)
index 0000000..35280f0
--- /dev/null
@@ -0,0 +1,215 @@
+/*
+ * File:         arch/blackfin/mach-bf561/secondary.S
+ * Based on:     arch/blackfin/mach-bf561/head.S
+ * Author:       Philippe Gerum <rpm@xenomai.org>
+ *
+ *               Copyright 2007 Analog Devices Inc.
+ *
+ * Description:  BF561 coreB bootstrap file
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/blackfin.h>
+#include <asm/asm-offsets.h>
+
+__INIT
+
+/* Lay the initial stack into the L1 scratch area of Core B */
+#define INITIAL_STACK  (COREB_L1_SCRATCH_START + L1_SCRATCH_LENGTH - 12)
+
+ENTRY(_coreb_trampoline_start)
+       /* Set the SYSCFG register */
+       R0 = 0x36;
+       SYSCFG = R0; /*Enable Cycle Counter and Nesting Of Interrupts(3rd Bit)*/
+       R0 = 0;
+
+       /*Clear Out All the data and pointer  Registers*/
+       R1 = R0;
+       R2 = R0;
+       R3 = R0;
+       R4 = R0;
+       R5 = R0;
+       R6 = R0;
+       R7 = R0;
+
+       P0 = R0;
+       P1 = R0;
+       P2 = R0;
+       P3 = R0;
+       P4 = R0;
+       P5 = R0;
+
+       LC0 = r0;
+       LC1 = r0;
+       L0 = r0;
+       L1 = r0;
+       L2 = r0;
+       L3 = r0;
+
+       /* Clear Out All the DAG Registers*/
+       B0 = r0;
+       B1 = r0;
+       B2 = r0;
+       B3 = r0;
+
+       I0 = r0;
+       I1 = r0;
+       I2 = r0;
+       I3 = r0;
+
+       M0 = r0;
+       M1 = r0;
+       M2 = r0;
+       M3 = r0;
+
+       /* Turn off the icache */
+       p0.l = LO(IMEM_CONTROL);
+       p0.h = HI(IMEM_CONTROL);
+       R1 = [p0];
+       R0 = ~ENICPLB;
+       R0 = R0 & R1;
+
+       /* Anomaly 05000125 */
+#ifdef ANOMALY_05000125
+       CLI R2;
+       SSYNC;
+#endif
+       [p0] = R0;
+       SSYNC;
+#ifdef ANOMALY_05000125
+       STI R2;
+#endif
+
+       /* Turn off the dcache */
+       p0.l = LO(DMEM_CONTROL);
+       p0.h = HI(DMEM_CONTROL);
+       R1 = [p0];
+       R0 = ~ENDCPLB;
+       R0 = R0 & R1;
+
+       /* Anomaly 05000125 */
+#ifdef ANOMALY_05000125
+       CLI R2;
+       SSYNC;
+#endif
+       [p0] = R0;
+       SSYNC;
+#ifdef ANOMALY_05000125
+       STI R2;
+#endif
+
+       /* in case of double faults, save a few things */
+       p0.l = _init_retx_coreb;
+       p0.h = _init_retx_coreb;
+       R0 = RETX;
+       [P0] = R0;
+
+#ifdef CONFIG_DEBUG_DOUBLEFAULT
+       /* Only save these if we are storing them,
+        * This happens here, since L1 gets clobbered
+        * below
+        */
+       GET_PDA(p0, r0);
+       r7 = [p0 + PDA_RETX];
+       p1.l = _init_saved_retx_coreb;
+       p1.h = _init_saved_retx_coreb;
+       [p1] = r7;
+
+       r7 = [p0 + PDA_DCPLB];
+       p1.l = _init_saved_dcplb_fault_addr_coreb;
+       p1.h = _init_saved_dcplb_fault_addr_coreb;
+       [p1] = r7;
+
+       r7 = [p0 + PDA_ICPLB];
+       p1.l = _init_saved_icplb_fault_addr_coreb;
+       p1.h = _init_saved_icplb_fault_addr_coreb;
+       [p1] = r7;
+
+       r7 = [p0 + PDA_SEQSTAT];
+       p1.l = _init_saved_seqstat_coreb;
+       p1.h = _init_saved_seqstat_coreb;
+       [p1] = r7;
+#endif
+
+       /* Initialize stack pointer */
+       sp.l = lo(INITIAL_STACK);
+       sp.h = hi(INITIAL_STACK);
+       fp = sp;
+       usp = sp;
+
+       /* This section keeps the processor in supervisor mode
+        * during core B startup.  Branches to the idle task.
+        */
+
+       /* EVT15 = _real_start */
+
+       p0.l = lo(EVT15);
+       p0.h = hi(EVT15);
+       p1.l = _coreb_start;
+       p1.h = _coreb_start;
+       [p0] = p1;
+       csync;
+
+       p0.l = lo(IMASK);
+       p0.h = hi(IMASK);
+       p1.l = IMASK_IVG15;
+       p1.h = 0x0;
+       [p0] = p1;
+       csync;
+
+       raise 15;
+       p0.l = .LWAIT_HERE;
+       p0.h = .LWAIT_HERE;
+       reti = p0;
+#if defined(ANOMALY_05000281)
+       nop; nop; nop;
+#endif
+       rti;
+
+.LWAIT_HERE:
+       jump .LWAIT_HERE;
+ENDPROC(_coreb_trampoline_start)
+ENTRY(_coreb_trampoline_end)
+
+ENTRY(_coreb_start)
+       [--sp] = reti;
+
+       p0.l = lo(WDOGB_CTL);
+       p0.h = hi(WDOGB_CTL);
+       r0 = 0xAD6(z);
+       w[p0] = r0;     /* Clear the watchdog. */
+       ssync;
+
+       /*
+        * switch to IDLE stack.
+        */
+       p0.l = _secondary_stack;
+       p0.h = _secondary_stack;
+       sp = [p0];
+       usp = sp;
+       fp = sp;
+       sp += -12;
+       call _init_pda
+       sp += 12;
+       call _secondary_start_kernel;
+.L_exit:
+       jump.s  .L_exit;
+ENDPROC(_coreb_start)
+
+__FINIT
diff --git a/arch/blackfin/mach-bf561/smp.c b/arch/blackfin/mach-bf561/smp.c
new file mode 100644 (file)
index 0000000..23fd4c1
--- /dev/null
@@ -0,0 +1,172 @@
+/*
+ * File:         arch/blackfin/mach-bf561/smp.c
+ * Author:       Philippe Gerum <rpm@xenomai.org>
+ *
+ *               Copyright 2007 Analog Devices Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.,
+ * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/smp.h>
+#include <asm/dma.h>
+
+#define COREB_SRAM_BASE  0xff600000
+#define COREB_SRAM_SIZE  0x4000
+
+extern char coreb_trampoline_start, coreb_trampoline_end;
+
+static DEFINE_SPINLOCK(boot_lock);
+
+static cpumask_t cpu_callin_map;
+
+/*
+ * platform_init_cpus() - Tell the world about how many cores we
+ * have. This is called while setting up the architecture support
+ * (setup_arch()), so don't be too demanding here with respect to
+ * available kernel services.
+ */
+
+void __init platform_init_cpus(void)
+{
+       cpu_set(0, cpu_possible_map); /* CoreA */
+       cpu_set(1, cpu_possible_map); /* CoreB */
+}
+
+void __init platform_prepare_cpus(unsigned int max_cpus)
+{
+       int len;
+
+       len = &coreb_trampoline_end - &coreb_trampoline_start + 1;
+       BUG_ON(len > COREB_SRAM_SIZE);
+
+       dma_memcpy((void *)COREB_SRAM_BASE, &coreb_trampoline_start, len);
+
+       /* Both cores ought to be present on a bf561! */
+       cpu_set(0, cpu_present_map); /* CoreA */
+       cpu_set(1, cpu_present_map); /* CoreB */
+
+       printk(KERN_INFO "CoreB bootstrap code to SRAM %p via DMA.\n", (void *)COREB_SRAM_BASE);
+}
+
+int __init setup_profiling_timer(unsigned int multiplier) /* not supported */
+{
+       return -EINVAL;
+}
+
+void __cpuinit platform_secondary_init(unsigned int cpu)
+{
+       local_irq_disable();
+
+       /* Clone setup for peripheral interrupt sources from CoreA. */
+       bfin_write_SICB_IMASK0(bfin_read_SICA_IMASK0());
+       bfin_write_SICB_IMASK1(bfin_read_SICA_IMASK1());
+       SSYNC();
+
+       /* Clone setup for IARs from CoreA. */
+       bfin_write_SICB_IAR0(bfin_read_SICA_IAR0());
+       bfin_write_SICB_IAR1(bfin_read_SICA_IAR1());
+       bfin_write_SICB_IAR2(bfin_read_SICA_IAR2());
+       bfin_write_SICB_IAR3(bfin_read_SICA_IAR3());
+       bfin_write_SICB_IAR4(bfin_read_SICA_IAR4());
+       bfin_write_SICB_IAR5(bfin_read_SICA_IAR5());
+       bfin_write_SICB_IAR6(bfin_read_SICA_IAR6());
+       bfin_write_SICB_IAR7(bfin_read_SICA_IAR7());
+       SSYNC();
+
+       local_irq_enable();
+
+       /* Calibrate loops per jiffy value. */
+       calibrate_delay();
+
+       /* Store CPU-private information to the cpu_data array. */
+       bfin_setup_cpudata(cpu);
+
+       /* We are done with local CPU inits, unblock the boot CPU. */
+       cpu_set(cpu, cpu_callin_map);
+       spin_lock(&boot_lock);
+       spin_unlock(&boot_lock);
+}
+
+int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+       unsigned long timeout;
+
+       /* CoreB already running?! */
+       BUG_ON((bfin_read_SICA_SYSCR() & COREB_SRAM_INIT) == 0);
+
+       printk(KERN_INFO "Booting Core B.\n");
+
+       spin_lock(&boot_lock);
+
+       /* Kick CoreB, which should start execution from CORE_SRAM_BASE. */
+       SSYNC();
+       bfin_write_SICA_SYSCR(bfin_read_SICA_SYSCR() & ~COREB_SRAM_INIT);
+       SSYNC();
+
+       timeout = jiffies + 1 * HZ;
+       while (time_before(jiffies, timeout)) {
+               if (cpu_isset(cpu, cpu_callin_map))
+                       break;
+               udelay(100);
+               barrier();
+       }
+
+       spin_unlock(&boot_lock);
+
+       return cpu_isset(cpu, cpu_callin_map) ? 0 : -ENOSYS;
+}
+
+void __init platform_request_ipi(irq_handler_t handler)
+{
+       int ret;
+
+       ret = request_irq(IRQ_SUPPLE_0, handler, IRQF_DISABLED,
+                         "SMP interrupt", handler);
+       if (ret)
+               panic("Cannot request supplemental interrupt 0 for IPI service\n");
+}
+
+void platform_send_ipi(cpumask_t callmap)
+{
+       unsigned int cpu;
+
+       for_each_cpu_mask(cpu, callmap) {
+               BUG_ON(cpu >= 2);
+               SSYNC();
+               bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (6 + cpu)));
+               SSYNC();
+       }
+}
+
+void platform_send_ipi_cpu(unsigned int cpu)
+{
+       BUG_ON(cpu >= 2);
+       SSYNC();
+       bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (6 + cpu)));
+       SSYNC();
+}
+
+void platform_clear_ipi(unsigned int cpu)
+{
+       BUG_ON(cpu >= 2);
+       SSYNC();
+       bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (10 + cpu)));
+       SSYNC();
+}