AbydOS开发日记 (5) - 基本内存映射
基本内存布局
由于大部分的 RV64 SoC 都将 DRAM 放置在 0x80000000 以上,其下的空间保留给 IO,如 QEMU 的 virt,其布局如下:
static const MemMapEntry virt_memmap[] = {
[VIRT_DEBUG] = { 0x0, 0x100 },
[VIRT_MROM] = { 0x1000, 0xf000 },
[VIRT_TEST] = { 0x100000, 0x1000 },
[VIRT_RTC] = { 0x101000, 0x1000 },
[VIRT_CLINT] = { 0x2000000, 0x10000 },
[VIRT_ACLINT_SSWI] = { 0x2F00000, 0x4000 },
[VIRT_PCIE_PIO] = { 0x3000000, 0x10000 },
[VIRT_PLATFORM_BUS] = { 0x4000000, 0x2000000 },
[VIRT_PLIC] = { 0xc000000, VIRT_PLIC_SIZE(VIRT_CPUS_MAX * 2) },
[VIRT_APLIC_M] = { 0xc000000, APLIC_SIZE(VIRT_CPUS_MAX) },
[VIRT_APLIC_S] = { 0xd000000, APLIC_SIZE(VIRT_CPUS_MAX) },
[VIRT_UART0] = { 0x10000000, 0x100 },
[VIRT_VIRTIO] = { 0x10001000, 0x1000 },
[VIRT_FW_CFG] = { 0x10100000, 0x18 },
[VIRT_FLASH] = { 0x20000000, 0x4000000 },
[VIRT_IMSIC_M] = { 0x24000000, VIRT_IMSIC_MAX_SIZE },
[VIRT_IMSIC_S] = { 0x28000000, VIRT_IMSIC_MAX_SIZE },
[VIRT_PCIE_ECAM] = { 0x30000000, 0x10000000 },
[VIRT_PCIE_MMIO] = { 0x40000000, 0x40000000 },
[VIRT_DRAM] = { 0x80000000, 0x0 },
};
由此,我们可以简单地将低 2G 空间直接映射,然后将 (可能不连续的) DRAM 从 0x80000000 开始进行映射,最后把 DRAM 的一小部分重映射到 VMA 的高地址空间。如下表所示:
VMA Start | VMA End | PMA Start | Usage |
---|---|---|---|
0x0 | 0x7FFFFFFF | 0x0 | Directly mapped IO |
0x80000000 | - | 0x80000000,... | Mapped continous kernel heap memory |
KernelStackEnd | VMA.LowerTop | (To be allocated) | Kernel stack |
MMU 控制实现
基础定义
基于上篇描述的 MMU 控制方法,首先做一点抽象,创建一个基类 MMUBase
:
class MMUBase
{
public:
static constexpr int PROT_NONE = 0, PROT_R = 1, PROT_W = 2, PROT_X = 4, PROT_U = 8, PROT_G = 16;
/**
* @brief Set MMU state
* @note The function will take effort immediately!
* @param enable true to enable, false to disable
* @return true if success, false if failed
*/
virtual bool enable(bool enable) = 0;
/**
* @brief switch the ASID to this
* @note The function will not sfence!
*/
virtual void switchASID() = 0;
virtual int map(uintptr_t vaddr, uintptr_t paddr, size_t size, int prot) = 0;
virtual int unmap(uintptr_t vaddr, size_t size) = 0;
virtual void apply() = 0;
virtual size_t getVMALowerTop() = 0;
virtual size_t getVMAUpperBottom() = 0;
};
这个基类是操作 MMU 的接口,其后可以对应 RV32 或 RV64 的实现。目前先开发 RV64 的,又由于其有三种变体,实现一个专用基类 RV64MMUBase
:
class RV64MMUBase : public MMUBase
{
public:
bool enable(bool enable)
{
if (enable)
{
csr_write(CSR_SATP, *(uint64_t *)(&_satp));
if (csr_read(CSR_SATP) != *(uint64_t *)(&_satp))
return false;
sfence_vma();
return true;
}
else
{
csr_write(CSR_SATP, 0);
sfence_vma();
return true;
}
}
void switchASID()
{
csr_write(CSR_SATP, *(uint64_t *)(&_satp));
// No need to sfence.vma
}
protected:
// 查看省略的代码
struct vaddr_t
{
uint64_t offset : 12;
uint64_t vpn0 : 9;
uint64_t vpn1 : 9;
uint64_t vpn2 : 9;
uint64_t vpn3 : 9;
uint64_t vpn4 : 9;
template <uint8_t sz> uint64_t getVPN(int level)
{
switch (level)
{
case 0:
if constexpr (sz == 39)
return vpn2;
else if constexpr (sz == 48)
return vpn3;
else if constexpr (sz == 57)
return vpn4;
else
return 0;
break;
case 1:
if constexpr (sz == 39)
return vpn1;
else if constexpr (sz == 48)
return vpn2;
else if constexpr (sz == 57)
return vpn3;
else
return 0;
break;
case 2:
if constexpr (sz == 39)
return vpn0;
else if constexpr (sz == 48)
return vpn1;
else if constexpr (sz == 57)
return vpn2;
else
return 0;
break;
case 3:
if constexpr (sz == 39)
return 0;
else if constexpr (sz == 48)
return vpn0;
else if constexpr (sz == 57)
return vpn1;
else
return 0;
break;
case 4:
if constexpr (sz == 39)
return 0;
else if constexpr (sz == 48)
return 0;
else if constexpr (sz == 57)
return vpn0;
else
return 0;
break;
default:
return 0;
break;
}
}
};
struct paddr_t
{
uint64_t offset : 12;
uint64_t ppn0 : 9;
uint64_t ppn1 : 9;
uint64_t ppn2 : 9;
uint64_t ppn3 : 9;
uint64_t ppn4 : 8;
uint64_t reserved : 8;
};
enum MMUMode_t
{
BARE = 0,
// 1-7 reserved for future use
SV39 = 8,
SV48 = 9,
SV57 = 10,
SV64 = 11, // Not defined in the current RISC-V specs
};
struct pte_t
{
uint64_t v : 1;
uint64_t r : 1;
uint64_t w : 1;
uint64_t x : 1;
uint64_t u : 1;
uint64_t g : 1;
uint64_t a : 1;
uint64_t d : 1;
uint64_t rsw : 2;
uint64_t ppn0 : 9;
uint64_t ppn1 : 9;
uint64_t ppn2 : 9;
uint64_t ppn3 : 9;
uint64_t ppn4 : 8;
uint64_t reserved : 10; // externsions off
// C++ 17 enabled!
template <uint8_t sz> auto fit()
{
this->reserved = 0;
if constexpr (sz <= 39)
this->ppn3 = 0;
if constexpr (sz <= 48)
this->ppn4 = 0;
return *this;
}
void ppn(uintptr_t addr)
{
auto paddr = (paddr_t *)&addr;
ppn0 = paddr->ppn0;
ppn1 = paddr->ppn1;
ppn2 = paddr->ppn2;
ppn3 = paddr->ppn3;
ppn4 = paddr->ppn4;
}
uintptr_t paddr()
{
return (*((uintptr_t *)this) << 2) & ~((0xFFFUL) + (0xFFUL << 56));
}
};
struct satp_t
{
uint64_t ppn : 44;
uint64_t asid : 16;
uint64_t mode : 4;
};
RV64MMUBase(MMUMode_t mode, uint16_t asid)
{
_satp.asid = asid;
_satp.mode = mode;
}
bool setPPN(uintptr_t addr)
{
if (addr & 4095)
return false;
_satp.ppn = addr2page4K(addr);
return true;
}
private: // data
satp_t _satp;
};
这里利用了 C++17 的 if constexpr 特性,对不同的实现上的 PTE 提供实质上不同的函数,而无需特化函数模板。并且,enable()
和 switchASID()
方法在该层实现。
接下来,编写一个模板类,同样利用 if constexpr 避免特化,实现 map 和 unmap 等。定义和一些简单方法如下:
template <uint8_t sz> class RV64MMU : public RV64MMUBase
{
private:
static constexpr auto _mmutype()
{
if constexpr (sz == 39)
return MMUMode_t::SV39;
else if constexpr (sz == 48)
return MMUMode_t::SV48;
else if constexpr (sz == 57)
return MMUMode_t::SV57;
else if constexpr (sz == 64)
return MMUMode_t::SV64;
else
return MMUMode_t::BARE;
}
public:
RV64MMU(uint16_t asid) : RV64MMUBase(_mmutype(), asid)
{
_ptes = alignedMalloc<pte_t>(512 * sizeof(pte_t), 4096);
setPPN((uintptr_t)_ptes);
}
~RV64MMU()
{
alignedFree(_ptes);
}
size_t getVMALowerTop() override
{
if constexpr (sz == 39)
return (1ULL << 38);
else if constexpr (sz == 48)
return (1ULL << 47);
else if constexpr (sz == 57)
return (1ULL << 56);
else
return 0;
}
size_t getVMAUpperBottom() override
{
if constexpr (sz == 39)
return -1ULL - (1ULL << 38) + 1;
else if constexpr (sz == 48)
return -1ULL - (1ULL << 47) + 1;
else if constexpr (sz == 57)
return -1ULL - (1ULL << 56) + 1;
else
return 0;
}
...
};
Map()
接下来实现 map()
。首先考虑页表的创建,可以使用虚拟地址 vaddr
和当前页表级数 level
作为参数,递归创建 (注意标记遍历路径上的有效位):
pte_t *_createPTE(int level, uintptr_t vaddr)
{
auto poff = ((vaddr_t *)&vaddr)->getVPN<sz>(level);
// printf("Creating PTE for %lx @ L%i with poff = %li\n", vaddr, level, poff);
if (level == 0)
return _ptes + poff; // We have already created the root level
auto parent = _createPTE(level - 1, vaddr); // Create parent PTE first
// printf("Original Parent PTE has value %lx\n", *(uint64_t *)parent);
pte_t *thisPTE = nullptr; // This level PTE 's base address
// printf("Parent PTE.paddr = 0x%lx\n", parent->paddr());
if (parent->paddr() != 0) // this level already created, get base
{
thisPTE = (pte_t *)(parent->paddr());
}
else
{
thisPTE = alignedMalloc<pte_t>(512 * sizeof(pte_t), 4096);
parent->ppn((uintptr_t)thisPTE);
parent->v = 1;
parent->r = 0;
parent->w = 0;
parent->x = 0; // mark as a pointer
parent->template fit<sz>();
// printf("Created new PTE at %lx\n", (uintptr_t)thisPTE);
// printf("Now Parent PTE has value %lx\n", *(uint64_t *)parent);
}
return thisPTE + poff;
}
然后实现一个整块 map 的函数,如下:
template <uint8_t blocksz> int _map(uintptr_t vaddr, uintptr_t paddr, int prot)
{
printf("* Mapping %lx to %lx with prot %i\n", vaddr, paddr, prot);
auto level = _calcLevel<blocksz>();
if (level < 0)
return level;
auto pte = _createPTE(level, vaddr);
// printf("PTE got: %lx\n", (uintptr_t)pte);
if (pte->v)
return K_EALREADY;
pte->v = 1;
pte->r = prot & PROT_R ? 1 : 0;
pte->w = prot & PROT_W ? 1 : 0;
pte->x = prot & PROT_X ? 1 : 0;
pte->u = prot & PROT_U ? 1 : 0;
pte->g = prot & PROT_G ? 1 : 0;
pte->ppn(paddr);
pte->template fit<sz>();
// printf("Now PTE value: %lx\n", *(uintptr_t *)pte);
return 0;
}
这里的 _calcLevel()
函数用于计算级数,接受块大小作为模板参数,也是一个 constexpr 函数。
最后实现 map()
,先做参数校验,然后从 va 开始,循环从大到小匹配块大小并调用相应的 _map()
:
int map(uintptr_t vaddr, uintptr_t paddr, size_t size, int prot) override
{
if (vaddr & 0xFFF || paddr & 0xFFF || size & 0xFFF) // Not aligned
return K_EINVAL;
if (size == 0)
return 0; // No need to map with size == 0
if (vaddr + size < vaddr || paddr + size < paddr) // overflow
return K_EINVALID_ADDR;
auto prott = prot & (PROT_R | PROT_W | PROT_X);
if (prott == 0b000 || prott == 0b010 || prott == 0b110)
return K_ENOSPC;
auto rc = 0;
// Divide the memory into blocks of size 256T,512G, 1G, 2M, and 4K
for (uintptr_t vcaddr = vaddr, pcaddr = paddr; vcaddr < vaddr + size;)
{
if (rc)
return rc;
if constexpr (sz >= 57) // Only SV57 and SV64 support 256T
{
if ((vcaddr & 0xFFFFFFFFFFFF) == 0) // 256T aligned
{
if (size - (vcaddr - vaddr) >= 1ULL << 48) // There are more than 256T to map
{
rc = _map<48>(vcaddr, pcaddr, prot);
vcaddr += 1ULL << 48;
pcaddr += 1ULL << 48;
continue;
}
}
}
...
}
return rc;
}
Unmap()
对于 unmap ,与 map 相似,也是循环匹配块大小并删除 (V=0)。实现上,如果删除的不是一个 4K 块,就释放下级 PTE 簇的内存,也是通过递归实现:
pte_t *_getPTE(int level, uintptr_t vaddr)
{
auto poff = ((vaddr_t *)&vaddr)->getVPN<sz>(level);
if (level == 0)
return _ptes + poff;
auto parent = _getPTE(level - 1, vaddr);
return (pte_t *)(parent->paddr()) + poff;
}
int _removePTE(int level, uintptr_t vaddr)
{
auto pte = _getPTE(level, vaddr);
if (!pte->v)
return K_EALREADY;
pte->v = 0;
pte->ppn(0);
pte->template fit<sz>();
if (level != _getMaxLevel())
{ // Next level already unused, free it
auto pteBase = (pte_t *)(pte->paddr());
alignedFree(pteBase);
}
return (level == 0 ? 0 : _removePTE(level - 1, vaddr));
}
template <uint8_t blocksz> int _unmap(uintptr_t vaddr)
{
printf("* Unmapping %lx\n", vaddr);
auto level = _calcLevel<blocksz>();
if (level < 0)
return level;
return _removePTE(level, vaddr);
}
测试
// Previous enabled MMU with object sysmmu
auto a = alignedMalloc<long>(4096, 4096);
printf("Original addr of a: 0x%lx\n", (uintptr_t)a);
*a = 1145141919810;
printf("Original value of a: %li\n", *a);
sysmmu->map((uintptr_t)a | 0xFFFFFFC000000000, (uintptr_t)a, 4096, MMUBase::PROT_W | MMUBase::PROT_R);
sysmmu->apply();
printf("Mapped addr of a: 0x%lx\n", (uintptr_t)a | 0xFFFFFFC000000000);
printf("Mapped value of a: %li\n", *(long *)((uintptr_t)a | 0xFFFFFFC000000000));
*(long *)((uintptr_t)a | 0xFFFFFFC000000000) = 1919810114514;
printf("We modified from mapped, now the original value is %li\n", *a);
printf("Now we unmap it\n");
sysmmu->unmap((uintptr_t)a | 0xFFFFFFC000000000, 4096);
sysmmu->apply();
printf("Original value of a: %li\n", *a);
printf("Accessing to mapped a (hang!)\n");
*(long *)((uintptr_t)a | 0xFFFFFFC000000000) = 1;
编译运行,典型的 log 如下:
Original addr of a: 0x802c4000
Original value of a: 1145141919810
* Mapping ffffffc0802c4000 to 802c4000 with prot 3
Mapped addr of a: 0xffffffc0802c4000
Mapped value of a: 1145141919810
We modified from mapped, now the original value is 1919810114514
Now we unmap it
* Unmapping ffffffc0802c4000
Original value of a: 1919810114514
Accessing to mapped a (hang!)
提示
此处代码仍存在一些问题,已经在新提交中修复,敬请参阅。