Tuesday, July 27, 2010

Final Kernel MMU/PageTable configuration

We now jump to __create_page_tables (same /arch/arm/kernel/head.S file)

/*
 207 * Setup the initial page tables.  We only setup the barest
 208 * amount which are required to get the kernel running, which
 209 * generally means mapping in the kernel code.
 210 *
 211 * r8  = machinfo
 212 * r9  = cpuid
 213 * r10 = procinfo
 214 *
 215 * Returns:
 216 *  r0, r3, r6, r7 corrupted
 217 *  r4 = physical page table address
 218 */
R4 loaded with Page Table Addr R4=@0x80004000
Reset entire 16K Page Table to zero.
Load R7 with entry from Proc. info offset at #8. 
=> R7=0x0C0E
 
         /*
 238         * Create identity mapping for first MB of kernel to
 239         * cater for the MMU enable.  This identity mapping
 240         * will be removed by paging_init().  We use our current program
 241         * counter to determine corresponding section base address.
 242         */
 
Load R6=PC=@0x800080AC
 
Ignore lower 20 bytes of R6 and OR with R7, Save result in R3
R3= @0x80000C0E
Store R3 value @([R4+R6] <<2) = [80004000] + 0800 << 2
= 80004000 + 2000
= @0x80006000
Save R3 at 0x80007000 as well.
Increment R0=R0+4
ldr     r6, =(KERNEL_END - 1) translated to
R6= 0xC02E8A3B 
Again R6 adjusted to PA results in
R6=0x8000700B
 
1:      cmp     r0, r6
 258        add     r3, r3, #1 << 20
 259        strls   r3, [r0], #4
 260        bls     1b
When loop starts, R0=80007004, R3=80000C0E, R6=8000700B
R3=R3+0x100000=80100C0E
R3 is then stored to 0x80007004, while R0 increments 4 more
 
The loop stops after 3 pages are mapped.
Fig: Dump after identity mapping loop
 281        /*
 282         * Then map first 1MB of ram in case it contains our boot params.
 283         */
 284        add     r0, r4, #PAGE_OFFSET >> 18
 285        orr     r6, r7, #(PHYS_OFFSET & 0xff000000)
 286        .if     (PHYS_OFFSET & 0x00f00000)
 287        orr     r6, r6, #(PHYS_OFFSET & 0x00f00000)
 288        .endif
 289        str     r6, [r0]
line 285 translates to
orr     r6, r7, #-0x80000000
next 289 is executed with r6=0x80000C0E, r0=0x80007000
 
In our case, no change to location 80007000, same as in above fig.
 
Finally, Jump back 
 332        mov     pc, lr
Control back to the following lines:
  91        /*
  92         * The following calls CPU specific code in a position independent
  93         * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
  94         * xxx_proc_info structure selected by __lookup_machine_type
  95         * above.  On return, the CPU will be ready for the MMU to be
  96         * turned on, and r0 will hold the CPU control register value.
  97         */
  98        ldr     r13, __switch_data              @ address to jump to after
  99                                                @ mmu has been enabled
 100        adr     lr, BSYM(__enable_mmu)          @ return (PIC) address
 101 ARM(   add     pc, r10, #PROCINFO_INITFUNC     )
 102 THUMB( add     r12, r10, #PROCINFO_INITFUNC    )
 103 THUMB( mov     pc, r12                         )
 104ENDPROC(stext)
R13=@0xC0008118 after ldr instrn. (Obtained from system.map)
R14 =@0x80008034
add pc, r10, #10 results in PC=@0x80008034
results in control at line
b      0x8000BA28
 
We jump to architecture specific Mem Mgmt setup function. In our case, we end up at
linux/arch/arm/mm/proc-v7.S
adr     r12, __v7_setup_stack           @ the local stack
 200        stmia   r12, {r0-r5, r7, r9, r11, lr}
 201        bl      v7_flush_dcache_all
 
Code again jumps to 0x8002ACE0
linux/arch/arm/mm/cache-v7.S
 /*
  21 *      v7_flush_dcache_all()
  22 *
  23 *      Flush the whole D-cache.
  24 *
  25 *      Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
  26 *
  27 *      - mm    - mm_struct describing address space
  28 */
  29ENTRY(v7_flush_dcache_all)
  30        dmb                                     @ ensure ordering with previous memory accesses
  31        mrc     p15, 1, r0, c0, c0, 1           @ read clidr
  32        ands    r3, r0, #0x7000000              @ extract loc from clidr
  33        mov     r3, r3, lsr #23                 @ left align loc bit field
  34        beq     finished                        @ if loc is 0, then no need to clean
  35        mov     r10, #0                         @ start clean at cache level 0
Cache handling to be discussed in a seperate post later. For now, we just run till we return.
Code runs and returns back to __v7_setup
ldmia   r12, {r0-r5, r7, r9, r11, lr}
 203
 204        mrc     p15, 0, r0, c0, c0, 0           @ read main ID register
 205        and     r10, r0, #0xff000000            @ ARM?
 206        teq     r10, #0x41000000
Here, we find the PROC ID(0x411FC083), variant(0x00100000), revision(0x3) and store combined
variant and revision(0x13) in R0. Next, we goto
2:      mov     r10, #0
 234#ifdef HARVARD_CACHE
 235        mcr     p15, 0, r10, c7, c5, 0          @ I+BTB cache invalidate
 236#endif
 237        dsb
 238#ifdef CONFIG_MMU
 239        mcr     p15, 0, r10, c8, c7, 0          @ invalidate I + D TLBs
 240        mcr     p15, 0, r10, c2, c0, 2          @ TTB control register
 241        orr     r4, r4, #TTB_FLAGS
 242        mcr     p15, 0, r4, c2, c0, 1           @ load TTB1
 243        mov     r10, #0x1f                      @ domains 0, 1 = manager
 244        mcr     p15, 0, r10, c3, c0, 0          @ load domain access register
We skip line #235
invalidate I + D TLBs, TTB1, Domain Access Reg,
ldr     r5, =0xff0a81a8                 @ PRRR
 274        ldr     r6, =0x40e040e0                 @ NMRR
 275        mcr     p15, 0, r5, c10, c2, 0          @ write PRRR
 276        mcr     p15, 0, r6, c10, c2, 1          @ write NMRR
We skip Line#281
 
mrc     p15, 0, r0, c1, c0, 0           @ read control register
 284        bic     r0, r0, r5                      @ clear bits them
 285        orr     r0, r0, r6                      @ set them
 286 THUMB( orr     r0, r0, #1 << 30        )       @ Thumb exceptions
 287        mov     pc, lr 
 
Finally, we return from mm config.
Note that, we return to address saved in R14, which happens to be 
__enable_mmu as seen from line# 100
 
 /*
 156 * Setup common bits before finally enabling the MMU.  Essentially
 157 * this is just loading the page table pointer and domain access
 158 * registers.
 159 */
 160__enable_mmu:
 161#ifdef CONFIG_ALIGNMENT_TRAP
 162        orr     r0, r0, #CR_A
 163#else
 164        bic     r0, r0, #CR_A
 165#endif
We execute only line #162 and jump to
 175        mov     r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 176                      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 177                      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
 178                      domain_val(DOMAIN_IO, DOMAIN_CLIENT))
 179        mcr     p15, 0, r5, c3, c0, 0           @ load domain access register
 180        mcr     p15, 0, r4, c2, c0, 0           @ load page table pointer
 181        b       __turn_mmu_on
Line #175 ends up as
mov     r5, #0x1F
Finally, branch to __turn_mmu_on
/*
 185 * Enable the MMU.  This completely changes the structure of the visible
 186 * memory space.  You will not be able to trace execution through this.
 187 * If you have an enquiry about this, *please* check the linux-arm-kernel
 188 * mailing list archives BEFORE sending another post to the list.
 189 *
 190 *  r0  = cp#15 control register
 191 *  r13 = *virtual* address to jump to upon completion
 192 *
 193 * other registers depend on the function called upon completion
 194 */
 195        .align  5
 196__turn_mmu_on:
 197        mov     r0, r0
 198        mcr     p15, 0, r0, c1, c0, 0           @ write control reg
 199        mrc     p15, 0, r3, c0, c0, 0           @ read id reg
 200        mov     r3, r3
 201        mov     r3, r13
 202        mov     pc, r3
 203ENDPROC(__turn_mmu_on)

**** Comment says it all! ****
 
Note that line#197 is translated as a nop 
Last 2 lines are also present only to flush content of ARM core pipeline 
So now, we are switching completely to !!** VM **!!
From this point onwards, we can use debugger with source code mapping, as we now have
system.map addresses being valid!
Immediately followinf line #202, we enter __mmap_switched: as this was the addr stored in
R13 in line #98.
Fig: First line in VM, after returning from above function
 
 
 
 
 
 
Now, we are running @__mmap_switched in /arch/arm/kernel/head-common.S

  32/*
  33 * The following fragment of code is executed with the MMU on in MMU mode,
  34 * and uses absolute addresses; this is not position independent.
  35 *
  36 *  r0  = cp#15 control register
  37 *  r1  = machine ID
  38 *  r2  = atags pointer
  39 *  r9  = processor ID
  40 */
  41__mmap_switched:
  42        adr     r3, __switch_data + 4
  43
  44        ldmia   r3!, {r4, r5, r6, r7}
  45        cmp     r4, r5                          @ Copy data segment if needed
  461:      cmpne   r5, r6
  47        ldrne   fp, [r4], #4
  48        strne   fp, [r5], #4
  49        bne     1b
  50
  51        mov     fp, #0                          @ Clear BSS (and zero fp)
  521:      cmp     r6, r7
  53        strcc   fp, [r6],#4
  54        bcc     1b
  55
  56 ARM(   ldmia   r3, {r4, r5, r6, r7, sp})
  57 THUMB( ldmia   r3, {r4, r5, r6, r7}    )
  58 THUMB( ldr     sp, [r3, #16]           )
  59        str     r9, [r4]                        @ Save processor ID
  60        str     r1, [r5]                        @ Save machine type
  61        str     r2, [r6]                        @ Save atags pointer
  62        bic     r4, r0, #CR_A                   @ Clear 'A' bit
  63        stmia   r7, {r0, r4}                    @ Save control register values
  64        b       start_kernel
Note that fp => R11
In our case, r4, r5 values read from __switch_data are equal, so we wont execute 
several initial lines. but jump directly to line #51.
However, we do loop to clear BSS
Finally, after saving several IDs and pointers, we finally jump to
start_kernel()



No comments:

Post a Comment