I called this code base as PuppyBeagle, as its more of a miniature version of a OS kernel, in terms of what it prepares the board with. Here are some of the files that I did write to bootup the board, setup Interrupt Vector Table (IVT), iniitialize RAM, initialize serial port for early debugging, initialize and blink LED connected to the TWL4030 PM IC, due to which, I need to program the I2C to communicate between OMAP and the PMIC.
I have also initialize the MMC module, though reading/writing is still not 100% reliably working and I had paused for sometime working more on this but rather continued development more on the linux side. But hopefully, once I move to my new place, I can take this up again!
/******************************************************************************
*             Entry Point to PuppyBeagle Boot code
*
* This part of the code does some basic IVT setup until an application code
* overrides this table with a well defined IVT.
*
*       Copyrights (c) 2010 Amarnath B Revanna, amarnath.revanna@gmail.com
******************************************************************************/
  .text
  .code 32
  .global _start
  .func _start
_start:
wait4debugger: b wait4debugger
  b reset
  ldr pc, [pc, #0x14]
  ldr pc, [pc, #0x14]
  ldr pc, [pc, #0x14]
  ldr pc, [pc, #0x14]
  ldr pc, [pc, #0x14]
  ldr pc, [pc, #0x14]
  ldr pc, [pc, #0x14]
/*
  ldr pc, =swi
  ldr pc, =pabort
  ldr pc, =dabort
rsvd:  b rsvd
  ldr pc, =irq
  ldr pc, =fiq
*/  
reset:
/*
 * We will branch to a C program to do low level initialization. Note that we
 * cannot use any initialized variables or depend on bss stack as they are not
 * yet setup. So we will do low level init. to prepare for a C environment.
 * Also note that this C function must reside within the 1k memory area to
 * branch to and return.
 * Before branching, we need to set a default stack where args. will be saved
 * and R14 needs to be updated with the return address.
 */
  /*
   * Setup stacks for different modes. We need to do this as we
   * may encounter exceptions anytime and should setup secondary
          * IVT to handle exceptions properly.
   */
  //We are in SVC mode currently, so start from there
  ldr sp, = __svc_stack__start;
  //Next, switch to IRQ mode
  mrs r0, cpsr
  bic r0, r0, #MODE_MASK
  orr r0, r0, #MODE_IRQ
  msr  cpsr_c, r0
  ldr sp, = __irq_stack__start;
  //Next, switch to IRQ mode
  mrs r0, cpsr
  bic r0, r0, #MODE_MASK
  orr r0, r0, #MODE_FIQ
  msr  cpsr_c, r0
  ldr sp, = __fiq_stack__start;
  //Next, switch to IRQ mode
  mrs r0, cpsr
  bic r0, r0, #MODE_MASK
  orr r0, r0, #MODE_ABT
  msr  cpsr_c, r0
  ldr sp, = __abt_stack__start;
  //Next, switch to IRQ mode
  mrs r0, cpsr
  bic r0, r0, #MODE_MASK
  orr r0, r0, #MODE_UND
  msr  cpsr_c, r0
  ldr sp, = __und_stack__start;
  //Next, switch to SYS/USR mode
  mrs r0, cpsr
  bic r0, r0, #MODE_MASK
  orr r0, r0, #MODE_SYS
  msr  cpsr_c, r0
  ldr sp, = __sys_stack__start;
  // Now, switch back to SVC mode
  mrs r0, cpsr
  bic r0, r0, #MODE_MASK
  orr r0, r0, #MODE_SVC
  msr  cpsr_c, r0
  /* Prepare for C runtime environment */
  //Clear bss section for unitialized var. as reqd. by C standards
  ldr r0, =__bss_start
  ldr r1, =__bss_end
bss_clear: cmp r0, r1
  beq clear_done
  mov r2, #0
  str r2, [r0], #4
  b bss_clear
  // All basic initialization done, can jump to C code from here
clear_done:
  bl low_level_init
  ldr r12, =main
  mov lr, pc
  bx r12
  //We never expect to return here, if we do, we throw exception
  swi 0xFFFFFF
  .size _start, . - _start
  .endfunc
.equ MODE_MASK,  0x1F
.equ MODE_ABT,  0b10111
.equ MODE_FIQ,  0b10001
.equ MODE_IRQ,  0b10010
.equ MODE_SVC,  0b10011
.equ MODE_SYS,  0b11111
.equ MODE_UND,  0b11011
.equ MODE_USR,  0b10000
.equ IVT_VECTOR_ADDR, 0x4020FFC8
.equ IVT_END_ADDR,  IVT_VECTOR_ADDR + (7 * 4)
  .end
