这个part是想详细走读一下用qemu运行kernel的最初始代码,也就是使用qemu运行kernel代码的详细逻辑,从qemu加载根目录下vmlinux镜像的逻辑,也就是运行arch/arm/kernel/head.S的整个过程,直到跳转到start_kernel,使用的kernel版本还是3.18。
上一章我们分析完了__create_page_table,接下来就做开启mmu前的准备工作。
这一部分和ARM架构强相关,建议找ARMV7相关架构的文档一起看,我这里用的是DDI0406C_d_armv7ar_arm.pdf
指令(add pc, r10, #PROCINFO_INITFUNC)执行后,PC跳转到__v7_ca9mp_setup,于是就来跑到__v7_setup相关的代码,这部分主要是初始化CP15寄存器。
关于CP15,它提供了一组寄存器和控制功能,用于管理系统的硬件资源和配置。这些资源包括内存管理、缓存控制、异常处理、系统保护和进程切换等。以下是CP15寄存器的主要功能:
- 系统控制:CP15包含一些用于控制系统行为的寄存器。例如,系统控制寄存器(SCR)可以用于控制系统的电源管理、异常优先级等。
- 内存管理:ARM架构使用内存管理单元(MMU)来实现虚拟内存。CP15包含一组转换表基址寄存器(TTBRs),它们存储了物理内存中的页表的基地址。通过这些页表,MMU可以将虚拟地址转换为物理地址。此外,还有一些寄存器用于控制内存访问权限和缓存行为。
- 缓存和写缓冲控制:ARM处理器通常包含缓存和写缓冲区以提高性能。CP15包含一些用于控制这些结构的寄存器。例如,缓存类型寄存器(CTR)提供了关于缓存结构的信息,而缓存操作寄存器则允许软件执行诸如清除缓存、使缓存无效等操作。
- 异常和中断处理:ARM架构支持多种类型的异常和中断。CP15包含一些用于管理这些事件的寄存器。例如,向量基址寄存器(VBAR)存储了异常向量的基地址,而中断控制寄存器则允许软件启用或禁用中断。
- 系统保护和安全性:CP15提供了一些用于增强系统保护和安全性的功能。例如,监视控制寄存器(MCR)可以用于控制硬件调试功能,而域访问控制寄存器(DACR)则允许软件定义不同的内存域并控制它们之间的访问权限。
- 进程切换:在多任务环境中,CP15寄存器用于存储和恢复进程上下文。这包括处理器的执行状态、寄存器值和内存管理信息等。通过保存和恢复这些信息,操作系统可以在不同的进程之间切换,从而实现并发执行。
/*
* The following calls CPU specific code in a position independent
* manner. See arch/arm/mm/proc-*.S for details. r10 = base of
* xxx_proc_info structure selected by __lookup_processor_type
* above. On return, the CPU will be ready for the MMU to be
* turned on, and r0 will hold the CPU control register value.
*/
ldr r13, =__mmap_switched @ address to jump to after //r13设置0x605e92e0,为__mmap_switched实际物理地址
@ mmu has been enabled
adr lr, BSYM(1f) @ return (PIC) address // 将1f所在地址0x60008074,也就是__enable_mmu给lr寄存器
mov r8, r4 @ set TTBR1 to swapper_pg_dir //r8 赋值swapper_pg_dir,也就是一级页表开始的地址
ARM( add pc, r10, #PROCINFO_INITFUNC ) // PC跳转到PROCINFO_INITFUNC,也就是__v7_ca9mp_setup
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( ret r12 )
1: b __enable_mmu
ENDPROC(stext)
.ltorg
#ifndef CONFIG_XIP_KERNEL
2: .long .
.long PAGE_OFFSET
#endif
...
/*
* __v7_setup
*
* Initialise TLB, Caches, and MMU state ready to switch the MMU
* on. Return in r0 the new CP15 C1 control register setting.
*
* This should be able to cover all ARMv7 cores.
*
* It is assumed that:
* - cache type register is implemented
*/
__v7_ca5mp_setup:
__v7_ca9mp_setup: //程序从这里继续执行
__v7_cr7mp_setup:
mov r10, #(1 << 0) @ Cache/TLB ops broadcasting //r10赋值1
b 1f
__v7_ca7mp_setup:
__v7_ca12mp_setup:
__v7_ca15mp_setup:
__v7_b15mp_setup:
__v7_ca17mp_setup:
mov r10, #0
1:
#ifdef CONFIG_SMP
ALT_SMP(mrc p15, 0, r0, c1, c0, 1) //这个不会执行,执行ALT_UP
ALT_UP(mov r0, #(1 << 6)) @ fake it for UP // r0 = 0x40
tst r0, #(1 << 6) @ SMP/nAMP mode enabled? // r0=0x40,所以CPSR中Z位为0
orreq r0, r0, #(1 << 6) @ Enable SMP/nAMP mode // 不执行
orreq r0, r0, r10 @ Enable CPU-specific SMP bits // 不执行
mcreq p15, 0, r0, c1, c0, 1 // 不执行
#endif
b __v7_setup
接下来,开始做__v7_setup。
__v7_setup:
adr r12, __v7_setup_stack @ the local stack //r12 = 0x6001d28c
stmia r12, {r0-r5, r7, r9, r11, lr} //刷新dcache前将0-r5, r7, r9, r11, lr这些寄存器当前存的值,放入__v7_setup_stack,这些寄存器在刷新dcache时会用
bl v7_flush_dcache_louis //v7_flush_dcache_louis在下面展开分析
ldmia r12, {r0-r5, r7, r9, r11, lr} //刷新dcache完毕,将__v7_setup_stack的值复原到寄存器中
mrc p15, 0, r0, c0, c0, 0 @ read main ID register //r0读MIDR = 0x410fc090
and r10, r0, #0xff000000 @ ARM?
teq r10, #0x41000000 //从MIDR的Implementer字段中判断是否是ARM自己实现的。
bne 3f
and r5, r0, #0x00f00000 @ variant // r5存variant = 0x0
and r6, r0, #0x0000000f @ revision // r6存revision = 0x0
orr r6, r6, r5, lsr #20-4 @ combine variant and revision //r6 = 0x0
ubfx r0, r0, #4, #12 @ primary part number // 将r0的4bit开始的12个bit位展开赋值给r0,r0=0xc09
/* Cortex-A8 Errata */
ldr r10, =0x00000c08 @ Cortex-A8 primary part number //判断是否是context-A8?
teq r0, r10
bne 2f //由于不是,跳转到2f
#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
teq r5, #0x00100000 @ only present in r1p*
mrceq p15, 0, r10, c1, c0, 1 @ read aux control register
orreq r10, r10, #(1 << 6) @ set IBE to 1
mcreq p15, 0, r10, c1, c0, 1 @ write aux control register
#endif
#ifdef CONFIG_ARM_ERRATA_458693
...
#endif
#ifdef CONFIG_ARM_ERRATA_460075
...
#endif
b 3f
/* Cortex-A9 Errata */
2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number //判断是否是Cortex-A9?
teq r0, r10
bne 3f //是的,不跳转,执行下面Cortex-A9特定CPU才需要执行的逻辑。
#ifdef CONFIG_ARM_ERRATA_742230
...
#endif
#ifdef CONFIG_ARM_ERRATA_742231
...
#endif
#ifdef CONFIG_ARM_ERRATA_743622
...
#endif
#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
...
1:
#endif
/* Cortex-A15 Errata */
3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number//判断是否是Cortex-A15?
teq r0, r10
bne 4f //不是,跳转到4f
#ifdef CONFIG_ARM_ERRATA_773022
...
#endif
4: mov r10, #0
mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate //r10写入ICIALLU,设置cache invalidate
#ifdef CONFIG_MMU
mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs //r10写入TLBIALL,清除整个TLB
v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup, v7_ttb_setup宏展开见下面代码, TTBR0和TTBR1(Translation table base register: 页表基地址寄存器)。其中,TTBR0用于存放用户空间的一级页表基址,TTBR1存放内核空间的一级页表基址。
ldr r5, =PRRR @ PRRR //.equ PRRR, 0xff0a81a8
ldr r6, =NMRR @ NMRR // .equ NMRR, 0x40e040e0
mcr p15, 0, r5, c10, c2, 0 @ write PRRR //设置PRRR和NMRR
mcr p15, 0, r6, c10, c2, 1 @ write NMRR
#endif
dsb @ Complete invalidations //数据同步隔离
#ifndef CONFIG_ARM_THUMBEE //未定义CONFIG_ARM_THUMBEE,进入运行
mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE //r0 = 0x1031
and r0, r0, #(0xf << 12) @ ThumbEE enabled field
teq r0, #(1 << 12) @ check if ThumbEE is present //r0的第12位为1,不跳转代码,关闭ThumbEE
bne 1f
mov r5, #0
mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0
mrc p14, 6, r0, c0, c0, 0 @ load TEECR
orr r0, r0, #1 @ set the 1st bit in order to //将TEECR的bit0设置1,写回TEECR,用来disable TEEHBR的用户空间访问。
mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access
1:
#endif
adr r5, v7_crval //v7_crval: crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
ldmia r5, {r5, r6} // r5 = 0x2120c302, r6=0x10c03c7d
ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables //CONFIG_CPU_ENDIAN_BE8未开启,这里代码不执行。CONFIG_CPU_ENDIAN_BE8宏表示开启大端字节序
#ifdef CONFIG_SWP_EMULATE //开启软件预取指
orr r5, r5, #(1 << 10) @ set SW bit in "clear"
bic r6, r6, #(1 << 10) @ clear it in "mmuset"
#endif
mrc p15, 0, r0, c1, c0, 0 @ read control register // 读取SCTLR,r0=0xc55070
bic r0, r0, r5 @ clear bits them // r0=0xc51070
orr r0, r0, r6 @ set them // r0 = 0x10c5387d
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
ret lr @ return to head.S:__ret 跳转到__enable_mmu
ENDPROC(__v7_setup)
//v7_ttb_setup宏展开
/*
* Macro for setting up the TTBRx and TTBCR registers.
* - \ttb0 and \ttb1 updated with the corresponding flags.
*/
.macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp // zero为r10, ttbr0, ttbr1为r4和r8,都为0x60004000
mcr p15, 0, \zero, c2, c0, 2 @ TTB control register //TTB 寄存器写0,关于TTBCR,TTBR0,TTBR1,可以参考这两篇文章https://blog.csdn.net/zhoutaopower/article/details/114293357,https://zhuanlan.zhihu.com/p/648096106中的介绍,这里表示用TTBR0.
ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) //不执行
ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) // TTBR0 = R4 = 0x60004000 | 0x59
ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) // 不执行
ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) //TTBR1 = R8 = 0x60004000 | 0x59
mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1
.endm
这里TTBR0和TTBR1都是设置write back的,具体什么是write back可以参考这篇文档:https://blog.csdn.net/qq_33471732/article/details/135300137
/*
* v7_flush_dcache_louis()
*
* Flush the D-cache up to the Level of Unification Inner Shareable
*
* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*/
ENTRY(v7_flush_dcache_louis)
dmb @ ensure ordering with previous memory accesses //数据存储器屏障,确保在新的存储器访问开始之前,所有的存储器访问已经完成。
mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr //将CLIDR读入R0,见下附1, R0=0x9000003
ALT_SMP(ands r3, r0, #(7 << 21)) @ extract LoUIS from clidr //ALT_SMP不执行
ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr // 将LOUU读入R3,就是cache实现在Lx的cache上,最多7级。LoUU实际是0x1,表明是1级cache
#ifdef CONFIG_ARM_ERRATA_643719
......
#endif
ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 //不执行
ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 // R3=0x2
reteq lr @ return if level == 0 // level不为0,不返回。
mov r10, #0 @ r10 (starting level) = 0 // r10=0,从0 开始清理
b flush_levels @ start flushing cache levels //开始刷新缓存操作,跳转到flush_levels
ENDPROC(v7_flush_dcache_louis)
附:
- CLIDR,也可参考(https://developer.arm.com/documentation/ddi0406/c/System-Level-Architecture/System-Control-Registers-in-a-VMSA-implementation/VMSA-System-control-registers-descriptions–in-register-order/CLIDR–Cache-Level-ID-Register–VMSA)
/*
* v7_flush_dcache_all()
*
* Flush the whole D-cache.
*
* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
*
* - mm - mm_struct describing address space
*/
ENTRY(v7_flush_dcache_all)
dmb @ ensure ordering with previous memory accesses
mrc p15, 1, r0, c0, c0, 1 @ read clidr
ands r3, r0, #0x7000000 @ extract loc from clidr
mov r3, r3, lsr #23 @ left align loc bit field
beq finished @ if loc is 0, then no need to clean
mov r10, #0 @ start clean at cache level 0
flush_levels: // 从这里开始继续
add r2, r10, r10, lsr #1 @ work out 3x current cache level //r2=0+0>>1 = 0
mov r1, r0, lsr r2 @ extract cache type bits from clidr //r1=r0>>r2 = r0>>0= 0x9000003
and r1, r1, #7 @ mask of the bits for current cache only //r1=r1 & 7=0x3 ,这个就是CLIDR里的Ctypex
cmp r1, #2 @ see what cache we have at this level //与#2相比,不等
blt skip @ skip if no cache, or just i-cache //(blt)Branch if Less Than小于则跳转,这里不跳转。
#ifdef CONFIG_PREEMPT
...
#endif
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr// 写CSSELR(见下图)寄存器,写r10=0代表选择L1 cache
isb @ isb to sych the new cssr&csidr //指令同步
mrc p15, 1, r1, c0, c0, 0 @ read the new csidr //读CSIDR(见下图)寄存器到r1, r1=0xe00fe019
#ifdef CONFIG_PREEMPT
......
#endif
and r2, r1, #7 @ extract the length of the cache lines // 得到r2=linesize=1,代表8words线宽
add r2, r2, #4 @ add 4 (line length offset) //r2=5
ldr r4, =0x3ff // r4=0x3ff,这里由于3ff超过一个字节,不能用mov所以用ldr替代mov来赋值
ands r4, r4, r1, lsr #3 @ find maximum number on the way size 读出associativity =r4 = 0x3
clz r5, r4 @ find bit position of way size increment // r5 = 0x1e
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 @ extract max number of the index size 读出numSets =r7 = 0x7f
loop1:
mov r9, r7 @ create working copy of max index
loop2:
ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 // r11 = r10 | 0xc0000000 = 0xc0000000
THUMB( lsl r6, r4, r5 )
THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11
ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 // r11 = r11 | 0xfe0 = 0xc0000fe0
THUMB( lsl r6, r9, r2 )
THUMB( orr r11, r11, r6 ) @ factor index number into r11
mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way //将r11写入DCCISW,做清理cache
subs r9, r9, #1 @ decrement the index //以r9也就是numSets 做循环清理
bge loop2
subs r4, r4, #1 @ decrement the way//r4也就是associativity 做循环清理
bge loop1
skip:
add r10, r10, #2 @ increment cache number // r10 = r10+2 = 0+2 = 2
cmp r3, r10 //比较r10和r3,如果有多级cache则再执行flush_levels多次清理,这里我们只有1级cache,所以这里直接等于后执行finished
bgt flush_levels
finished:
mov r10, #0 @ swith back to cache level 0
mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr 再次选回L1Cache,这里是对如果有清理过多级cache时,清理完选L1Cache。只清理L1Cache时,这步是冗余的。
dsb st //数据同步隔离
isb //指令同步隔离
ret lr //这个lr很关键,回返回到b v7_flush_dcache_louis下一个指令ldmia r12, {r0-r5, r7, r9, r11, lr},将这些寄存器恢复。
ENDPROC(v7_flush_dcache_all)
CSSELR:
CCSIDR:
MIDR:
备注: 在Cache中,numSets 表示缓存中总共有多少个集合。每个集合包含一个或多个缓存行。associativity
表示每个集合中有多少个缓存行。lineSize 则表示每个缓存行的大小。 numSets、associativity 和 lineSize
是缓存的重要参数,它们会影响缓存的性能和效率。例如,numSets
越大,缓存能够存储的数据就越多,但同时也会增加缓存的查找时间。associativity
越高,缓存的冲突率就越低,但也会增加缓存的成本。lineSize 则会影响缓存的带宽和延迟。
至此,我们做完了页表映射和cache清理,现在开始正式启动mmu:
/*
* Setup common bits before finally enabling the MMU. Essentially
* this is just loading the page table pointer and domain access
* registers.
*
* r0 = cp#15 control register //0x10c5387d
* r1 = machine ID // 0x8e0
* r2 = atags or dtb pointer //0x68000000
* r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)// 0x60004059
* r9 = processor ID //0x410fc090
* r13 = *virtual* address to jump to upon completion //0x605e92e0, __mmap_switched地址
*/
__enable_mmu:
#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6
orr r0, r0, #CR_A //不执行
#else
bic r0, r0, #CR_A //disable Alignment abort
#endif
#ifdef CONFIG_CPU_DCACHE_DISABLE
bic r0, r0, #CR_C //不执行
#endif
#ifdef CONFIG_CPU_BPREDICT_DISABLE
bic r0, r0, #CR_Z //不执行
#endif
#ifdef CONFIG_CPU_ICACHE_DISABLE
bic r0, r0, #CR_I //不执行
#endif
#ifndef CONFIG_ARM_LPAE
mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT)) //r5=0x15
mcr p15, 0, r5, c3, c0, 0 @ load domain access register
mcr p15, 0, r4, c2, c0, 0 @ load page table pointer
#endif
b __turn_mmu_on //跳转到__turn_mmu_on
ENDPROC(__enable_mmu)
再看下真正是能mmu
/*
* Enable the MMU. This completely changes the structure of the visible
* memory space. You will not be able to trace execution through this.
* If you have an enquiry about this, *please* check the linux-arm-kernel
* mailing list archives BEFORE sending another post to the list.
*
* r0 = cp#15 control register //0x10c5387d
* r1 = machine ID
* r2 = atags or dtb pointer
* r9 = processor ID
* r13 = *virtual* address to jump to upon completion
*
* other registers depend on the function called upon completion
*/
.align 5
.pushsection .idmap.text, "ax"
ENTRY(__turn_mmu_on)
mov r0, r0 //nop,清理流水线
instr_sync //isb,指令同步
mcr p15, 0, r0, c1, c0, 0 @ write control reg//写控制寄存器,真正开启mmu
mrc p15, 0, r3, c0, c0, 0 @ read id reg //r3=0x410fc090,只是随便读一个cp15寄存器,保证上一步写入成功
instr_sync
mov r3, r3
mov r3, r13
ret r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
.popsection
接下来是最后一步,跳转到start_kernel了:
/*
* The following fragment of code is executed with the MMU on in MMU mode,
* and uses absolute addresses; this is not position independent.
*
* r0 = cp#15 control register//0x10c5387d
* r1 = machine ID //0x8e0
* r2 = atags/dtb pointer //0x68000000
* r9 = processor ID // 0x410fc090
*/
__INIT
__mmap_switched:
adr r3, __mmap_switched_data //__mmap_switched_data 是一段用来保存__data_loc,_sdata,__bss_start,__end的内存。
ldmia r3!, {r4, r5, r6, r7} //将__data_loc,_sdata,__bss_start,__end所在地址放到r4~r7,分别为:
//r4 0x60628000 1617068032
//r5 0x60628000 1617068032
//r6 0x60658520 1617265952
//r7 0x6067eb34 1617423156
cmp r4, r5 @ Copy data segment if needed
1: cmpne r5, r6 //不执行
ldrne fp, [r4], #4 //不执行
strne fp, [r5], #4//不执行
bne 1b//不执行
mov fp, #0 @ Clear BSS (and zero fp)
1: cmp r6, r7 //将BSS段清零
strcc fp, [r6],#4 // r13=[r6], r6 = r6+4
bcc 1b
ARM( ldmia r3, {r4, r5, r6, r7, sp}) //r4~sp恢复成存在r3(__mmap_switched_data+0x10 )所在地址
THUMB( ldmia r3, {r4, r5, r6, r7} )
THUMB( ldr sp, [r3, #16] )
str r9, [r4] @ Save processor ID //将r9存放的process ID存到processor_id的地址上
str r1, [r5] @ Save machine type // 将r1上存放的 machine type 放到__machine_arch_type的地址上
str r2, [r6] @ Save atags pointer// 将r2上存放的 atags pointer放到__atags_pointer的地址上
cmp r7, #0
strne r0, [r7] @ Save control register values // 如果r7非空,则将r0上的CP15系统控制寄存器值,存放到cr_alignment的地址上
b start_kernel //跳转到start_kernel
ENDPROC(__mmap_switched)
以上,我们就完成了清理cache,页表映射,并且使能mmu,最终跳转到start_kernel中。跳转之前,给C代码传递了processor_id,__machine_arch_type,__atags_pointer,cr_alignment这几个全局变量供C代码使用。