bl  cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _image_copy_end_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        add     r2, r2, #8              /* each rel.dyn entry is 8 bytes */
        cmp     r2, r3
        blo     fixloop
-       b       clear_bss
+       bx      lr
+
+#endif
+
+relocate_done:
+
+       bx      lr
+
+#ifndef CONFIG_SPL_BUILD
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
        .word __rel_dyn_end - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
-#endif
 
-clear_bss:
-#ifdef CONFIG_SPL_BUILD
-       /* No relocation for SPL */
-       ldr     r0, =__bss_start
-       ldr     r1, =__bss_end__
-#else
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
 #endif
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     r0, _nand_boot_ofs
-       mov     pc, r0
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
 
-_nand_boot_ofs:
-       .word nand_boot
-#else
-jump_2_ram:
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
+       bx      lr
 
 /*
  *************************************************************************
 
         */
        bl      lowlevel_init           /* go setup pll,mux,memory */
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
 skip_hw_init:
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-#ifndef CONFIG_NAND_SPL
-       bl coloured_LED_init
-       bl red_led_on
-#endif
-#endif
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     pc, _nand_boot
-
-_nand_boot: .word nand_boot
-#else
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
+       bx      lr
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
        .word mmu_table
 #endif
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 #ifndef CONFIG_NAND_SPL
 /*
  * we assume that cache operation is done before. (eg. cleanup_before_linux())
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-
-       bl coloured_LED_init
-       bl red_led_on
-#endif
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
        mov     pc, lr
 
-_board_init_r_ofs:
-       .word board_init_r - _start
-
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _rel_dyn_end_ofs:
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 /*
  *************************************************************************
  *
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-
-       bl coloured_LED_init
-       bl red_led_on
-#endif
-
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     r0, _nand_boot_ofs
-       mov     pc, r0
+relocate_done:
 
-_nand_boot_ofs:
-       .word nand_boot
-#else
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
        mov     pc, lr
 
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
-
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _rel_dyn_end_ofs:
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 /*
  *************************************************************************
  *
 
        bl  cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-
-       bl coloured_LED_init
-       bl red_led_on
-#endif
-
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     r0, _nand_boot_ofs
-       mov     pc, r0
+relocate_done:
 
-_nand_boot_ofs:
-       .word nand_boot
-#else
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
        mov     pc, lr
 
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
-
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _rel_dyn_end_ofs:
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 /*
  *************************************************************************
  *
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-#ifdef CONFIG_NAND_SPL /* deprecated, use instead CONFIG_SPL_BUILD */
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-#else
-#ifdef CONFIG_SPL_BUILD
-       ldr     sp, =(CONFIG_SPL_STACK)
-#else
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-#endif
-#endif
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        sub     r9, r6, r0              /* r9 <- relocation offset */
        cmp     r0, r6
-       moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       moveq   r9, #0                  /* no relocation. offset(r9) = 0 */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifdef CONFIG_SPL_BUILD
-       /* No relocation for SPL */
-       ldr     r0, =__bss_start
-       ldr     r1, =__bss_end__
-#else
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-#endif
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
+relocate_done:
 
-#ifndef CONFIG_SPL_BUILD
-       bl coloured_LED_init
-       bl red_led_on
-#endif
-
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     r0, _nand_boot_ofs
-       mov     pc, r0
-
-_nand_boot_ofs:
-       .word nand_boot
-#else
-       ldr     r0, _board_init_r_ofs
-       ldr     r1, _TEXT_BASE
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
+       bx      lr
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
        .word __rel_dyn_end - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
+
 #endif
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       bx      lr
+
 /*
  *************************************************************************
  *
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-#endif
-
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     pc, _nand_boot
+relocate_done:
 
-_nand_boot: .word nand_boot
-#else
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
        mov     pc, lr
 
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
-
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _rel_dyn_end_ofs:
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 /*
  *************************************************************************
  *
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-
-       bl coloured_LED_init
-       bl red_led_on
-#endif
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_NAND_SPL
-       ldr     r0, _nand_boot_ofs
-       mov     pc, r0
-
-_nand_boot_ofs:
-       .word nand_boot
-#else
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
+       bx      lr
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 /*
  *************************************************************************
  *
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _image_copy_end_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        add     r2, r2, #8              /* each rel.dyn entry is 8 bytes */
        cmp     r2, r3
        blo     fixloop
-       b       clear_bss
+
+relocate_done:
+
+       bx      lr
+
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _rel_dyn_end_ofs:
        .word __rel_dyn_end - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
+ENDPROC(relocate_code)
 
-clear_bss:
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
+#endif
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-jump_2_ram:
+ENTRY(c_runtime_cpu_setup)
 /*
  * If I-cache is enabled invalidate it
  */
        mcr     p15, 0, r0, c12, c0, 0  @Set VBAR
 #endif /* !Tegra20 */
 
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
-ENDPROC(relocate_code)
-#endif
+       bx      lr
+
+ENDPROC(c_runtime_cpu_setup)
 
 /*************************************************************************
  *
 
        orr     r0,r0,#0x13
        msr     cpsr,r0
 
-/* Set initial stackpointer in SDRAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-
-       bl coloured_LED_init
-       bl red_led_on
-#endif
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
+       bx      lr
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       bx      lr
+
 /****************************************************************************/
 /*                                                                         */
 /* Interrupt handling                                                      */
 
        bl      lock_cache_for_stack
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0, =0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 #ifndef CONFIG_SPL_BUILD
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
 /* Disable the Dcache RAM lock for stack now */
 #ifdef CONFIG_CPU_PXA25X
        bl      cpu_init_crit
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-#endif /* #ifndef CONFIG_SPL_BUILD */
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-#ifdef CONFIG_ONENAND_SPL
-       ldr     r0, _onenand_boot_ofs
-       mov     pc, r0
-
-_onenand_boot_ofs:
-       .word onenand_boot
-#else
-jump_2_ram:
-       ldr     r0, _board_init_r_ofs
-       ldr     r1, _TEXT_BASE
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
-#endif
+       bx      lr
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
        .word __rel_dyn_end - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
+
 #endif
+
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       bx      lr
+
 /*
  *************************************************************************
  *
 
        bl      lowlevel_init
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-
-       bl coloured_LED_init
-       bl red_led_on
-#endif
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
-       mov     pc, lr
-
-_board_init_r_ofs:
-       .word board_init_r - _start
+       bx      lr
 
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       bx      lr
+
 /*
  *************************************************************************
  *
 
        bl      cpu_init_crit
 #endif
 
-/* Set stackpointer in internal RAM to call board_init_f */
-call_board_init_f:
-       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
-       bic     sp, sp, #7 /* 8-byte alignment for ABI compliance */
-       ldr     r0,=0x00000000
-       bl      board_init_f
+       bl      _main
 
 /*------------------------------------------------------------------------------*/
 
        mov     r5, r1  /* save addr of gd */
        mov     r6, r2  /* save addr of destination */
 
-       /* Set up the stack                                                 */
-stack_setup:
-       mov     sp, r4
-
        adr     r0, _start
        cmp     r0, r6
        moveq   r9, #0          /* no relocation. relocation offset(r9) = 0 */
-       beq     clear_bss               /* skip relocation */
+       beq     relocate_done           /* skip relocation */
        mov     r1, r6                  /* r1 <- scratch for copy_loop */
        ldr     r3, _bss_start_ofs
        add     r2, r0, r3              /* r2 <- source end address         */
        blo     fixloop
 #endif
 
-clear_bss:
-#ifndef CONFIG_SPL_BUILD
-       ldr     r0, _bss_start_ofs
-       ldr     r1, _bss_end_ofs
-       mov     r4, r6                  /* reloc addr */
-       add     r0, r0, r4
-       add     r1, r1, r4
-       mov     r2, #0x00000000         /* clear                            */
-
-clbss_l:cmp    r0, r1                  /* clear loop... */
-       bhs     clbss_e                 /* if reached end of bss, exit */
-       str     r2, [r0]
-       add     r0, r0, #4
-       b       clbss_l
-clbss_e:
-#endif
+relocate_done:
 
-/*
- * We are done. Do not return, instead branch to second part of board
- * initialization, now running from RAM.
- */
-       ldr     r0, _board_init_r_ofs
-       adr     r1, _start
-       add     lr, r0, r1
-       add     lr, lr, r9
-       /* setup parameters for board_init_r */
-       mov     r0, r5          /* gd_t */
-       mov     r1, r6          /* dest_addr */
-       /* jump to it ... */
        mov     pc, lr
 
-_board_init_r_ofs:
-       .word board_init_r - _start
-
 _rel_dyn_start_ofs:
        .word __rel_dyn_start - _start
 _rel_dyn_end_ofs:
 _dynsym_start_ofs:
        .word __dynsym_start - _start
 
+       .globl  c_runtime_cpu_setup
+c_runtime_cpu_setup:
+
+       mov     pc, lr
+
 /*
  *************************************************************************
  *
 
 
 GLCOBJS        += div0.o
 
+SOBJS-y += crt0.o
+
 ifndef CONFIG_SPL_BUILD
 COBJS-y        += board.o
 COBJS-y        += bootm.o
 
 
        bootstage_mark_name(BOOTSTAGE_ID_START_UBOOT_F, "board_init_f");
 
-       /* Pointer is writable since we allocated a register for it */
-       gd = (gd_t *) ((CONFIG_SYS_INIT_SP_ADDR) & ~0x07);
-       /* compiler optimization barrier needed for GCC >= 3.4 */
-       __asm__ __volatile__("": : :"memory");
-
        memset((void *)gd, 0, sizeof(gd_t));
 
        gd->mon_len = _bss_end_ofs;
                gd->fdt_blob = new_fdt;
        }
        memcpy(id, (void *)gd, sizeof(gd_t));
-
-       relocate_code(addr_sp, id, addr);
-
-       /* NOTREACHED - relocate_code() does not return */
 }
 
 #if !defined(CONFIG_SYS_NO_FLASH)
        ulong flash_size;
 #endif
 
-       gd = id;
-
        gd->flags |= GD_FLG_RELOC;      /* tell others: relocation done */
        bootstage_mark_name(BOOTSTAGE_ID_START_UBOOT_R, "board_init_r");
 
 
--- /dev/null
+/*
+ *  crt0 - C-runtime startup Code for ARM U-Boot
+ *
+ *  Copyright (c) 2012  Albert ARIBAUD <albert.u.boot@aribaud.net>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <asm-offsets.h>
+
+/*
+ * This file handles the target-independent stages of the U-Boot
+ * start-up where a C runtime environment is needed. Its entry point
+ * is _main and is branched into from the target's start.S file.
+ *
+ * _main execution sequence is:
+ *
+ * 1. Set up initial environment for calling board_init_f().
+ *    This environment only provides a stack and a place to store
+ *    the GD ('global data') structure, both located in some readily
+ *    available RAM (SRAM, locked cache...). In this context, VARIABLE
+ *    global data, initialized or not (BSS), are UNAVAILABLE; only
+ *    CONSTANT initialized data are available.
+ *
+ * 2. Call board_init_f(). This function prepares the hardware for
+ *    execution from system RAM (DRAM, DDR...) As system RAM may not
+ *    be available yet, , board_init_f() must use the current GD to
+ *    store any data which must be passed on to later stages. These
+ *    data include the relocation destination, the future stack, and
+ *    the future GD location.
+ *
+ * (the following applies only to non-SPL builds)
+ *
+ * 3. Set up intermediate environment where the stack and GD are the
+ *    ones allocated by board_init_f() in system RAM, but BSS and
+ *    initialized non-const data are still not available.
+ *
+ * 4. Call relocate_code(). This function relocates U-Boot from its
+ *    current location into the relocation destination computed by
+ *    board_init_f().
+ *
+ * 5. Set up final environment for calling board_init_r(). This
+ *    environment has BSS (initialized to 0), initialized non-const
+ *    data (initialized to their intended value), and stack in system
+ *    RAM. GD has retained values set by board_init_f(). Some CPUs
+ *    have some work left to do at this point regarding memory, so
+ *    call c_runtime_cpu_setup.
+ *
+ * 6. Branch to either nand_boot() or board_init_r().
+ */
+
+/*
+ * declare nand_boot() or board_init_r() to jump to at end of crt0
+ */
+
+#if defined(CONFIG_NAND_SPL)
+
+.globl nand_boot
+
+#elif ! defined(CONFIG_SPL_BUILD)
+
+.globl board_init_r
+
+#endif
+
+/*
+ * start and end of BSS
+ */
+
+.globl __bss_start
+.globl __bss_end__
+
+/*
+ * entry point of crt0 sequence
+ */
+
+.global _main
+
+_main:
+
+/*
+ * Set up initial C runtime environment and call board_init_f(0).
+ */
+
+#if defined(CONFIG_NAND_SPL)
+       /* deprecated, use instead CONFIG_SPL_BUILD */
+       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
+#elif defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
+       ldr     sp, =(CONFIG_SPL_STACK)
+#else
+       ldr     sp, =(CONFIG_SYS_INIT_SP_ADDR)
+#endif
+       bic     sp, sp, #7      /* 8-byte alignment for ABI compliance */
+       sub     sp, #GD_SIZE    /* allocate one GD above SP */
+       bic     sp, sp, #7      /* 8-byte alignment for ABI compliance */
+       mov     r8, sp          /* GD is above SP */
+       mov     r0, #0
+       bl      board_init_f
+
+#if ! defined(CONFIG_SPL_BUILD)
+
+/*
+ * Set up intermediate environment (new sp and gd) and call
+ * relocate_code(addr_sp, gd, addr_moni). Trick here is that
+ * we'll return 'here' but relocated.
+ */
+
+       ldr     sp, [r8, #GD_START_ADDR_SP]     /* r8 = gd->start_addr_sp */
+       bic     sp, sp, #7      /* 8-byte alignment for ABI compliance */
+       ldr     r8, [r8, #GD_BD]                /* r8 = gd->bd */
+       sub     r8, r8, #GD_SIZE                /* new GD is below bd */
+
+       adr     lr, here
+       ldr     r0, [r8, #GD_RELOC_OFF]         /* lr = gd->start_addr_sp */
+       add     lr, lr, r0
+       ldr     r0, [r8, #GD_START_ADDR_SP]     /* r0 = gd->start_addr_sp */
+       mov     r1, r8                          /* r1 = gd */
+       ldr     r2, [r8, #GD_RELOCADDR]         /* r2 = gd->relocaddr */
+       b       relocate_code
+here:
+
+/* Set up final (full) environment */
+
+       bl      c_runtime_cpu_setup     /* we still call old routine here */
+
+       ldr     r0, =__bss_start        /* this is auto-relocated! */
+       ldr     r1, =__bss_end__        /* this is auto-relocated! */
+
+       mov     r2, #0x00000000         /* prepare zero to clear BSS */
+
+clbss_l:cmp    r0, r1                  /* while not at end of BSS */
+       strlo   r2, [r0]                /* clear 32-bit BSS word */
+       addlo   r0, r0, #4              /* move to next */
+       blo     clbss_l
+
+       bl coloured_LED_init
+       bl red_led_on
+
+#if defined(CONFIG_NAND_SPL)
+
+       /* call _nand_boot() */
+       ldr     pc, =nand_boot
+
+#else
+
+       /* call board_init_r(gd_t *id, ulong dest_addr) */
+       mov     r0, r8                  /* gd_t */
+       ldr     r1, [r8, #GD_RELOCADDR] /* dest_addr */
+       /* call board_init_r */
+       ldr     pc, =board_init_r       /* this is auto-relocated! */
+
+#endif
+
+       /* we should not return here. */
+
+#endif
 
 extern char console_buffer[];
 
 /* arch/$(ARCH)/lib/board.c */
-void   board_init_f  (ulong) __attribute__ ((noreturn));
+void   board_init_f(ulong);
 void   board_init_r  (gd_t *, ulong) __attribute__ ((noreturn));
 int    checkboard    (void);
 int    checkflash    (void);
 
 #define CONFIG_SYS_BOOTMAPSZ           ((256*1024*1024) - (4*1024))
 
 #define CONFIG_SPL_RAM_DEVICE
-#define CONFIG_SPL_STACK (&__stack_start)
+#define CONFIG_SPL_STACK CONFIG_SYS_INIT_SP_ADDR
 #define CONFIG_SYS_SPL_MALLOC_START ((unsigned long) (&__malloc_start))
 #define CONFIG_SYS_SPL_MALLOC_SIZE (&__malloc_end - &__malloc_start)
 
 
        DEFINE(GENERATED_BD_INFO_SIZE,
                (sizeof(struct bd_info) + 15) & ~15);
 
+       DEFINE(GD_SIZE, sizeof(struct global_data));
+
+       DEFINE(GD_BD, offsetof(struct global_data, bd));
+
+#if defined(CONFIG_ARM)
+
+       DEFINE(GD_RELOCADDR, offsetof(struct global_data, relocaddr));
+
+       DEFINE(GD_RELOC_OFF, offsetof(struct global_data, reloc_off));
+
+       DEFINE(GD_START_ADDR_SP, offsetof(struct global_data, start_addr_sp));
+
+#endif
+
        return 0;
 }
 
 AFLAGS += -DCONFIG_SPL_BUILD -DCONFIG_NAND_SPL
 CFLAGS += -DCONFIG_SPL_BUILD -DCONFIG_NAND_SPL
 
-SOBJS  = start.o lowlevel_init.o
+SOBJS  = start.o crt0.o lowlevel_init.o
 COBJS  = nand_boot_fsl_nfc.o
 
 SRCS   := $(SRCTREE)/nand_spl/nand_boot_fsl_nfc.c
 SRCS   += $(SRCTREE)/arch/arm/cpu/arm1136/start.S
+SRCS   += $(SRCTREE)/arch/arm/lib/crt0.S
 SRCS   += $(SRCTREE)/board/freescale/mx31pdk/lowlevel_init.S
 OBJS   := $(addprefix $(obj),$(SOBJS) $(COBJS))
 __OBJS := $(SOBJS) $(COBJS)
 $(obj)%.o:     $(SRCTREE)/arch/arm/cpu/arm1136/%.S
        $(CC) $(AFLAGS) -c -o $@ $<
 
+$(obj)%.o:     $(SRCTREE)/arch/arm/lib/%.S
+       $(CC) $(AFLAGS) -c -o $@ $<
+
 $(obj)%.o:     $(SRCTREE)/board/freescale/mx31pdk/%.S
        $(CC) $(AFLAGS) -c -o $@ $<
 
 
 AFLAGS += -DCONFIG_SPL_BUILD -DCONFIG_NAND_SPL
 CFLAGS += -DCONFIG_SPL_BUILD -DCONFIG_NAND_SPL
 
-SOBJS  = start.o lowlevel_init.o
+SOBJS  = start.o crt0.o lowlevel_init.o
 COBJS  = nand_boot_fsl_nfc.o
 
 SRCS   := $(SRCTREE)/nand_spl/nand_boot_fsl_nfc.c
 SRCS   += $(SRCTREE)/arch/arm/cpu/arm926ejs/start.S
+SRCS   += $(SRCTREE)/arch/arm/lib/crt0.S
 SRCS   += $(SRCTREE)/board/karo/tx25/lowlevel_init.S
 OBJS   := $(addprefix $(obj),$(SOBJS) $(COBJS))
 __OBJS := $(SOBJS) $(COBJS)
 $(obj)%.o:     $(SRCTREE)/arch/arm/cpu/arm926ejs/%.S
        $(CC) $(AFLAGS) -c -o $@ $<
 
+$(obj)%.o:     $(SRCTREE)/arch/arm/lib/%.S
+       $(CC) $(AFLAGS) -c -o $@ $<
+
 $(obj)%.o:     $(SRCTREE)/board/karo/tx25/%.S
        $(CC) $(AFLAGS) -c -o $@ $<