[2021]解读HyperPlatform的MMU(memory management unit)虚拟化 huoji 虚拟机,hypervisor,HyperPlatform,MMU,内存虚拟化 2021-05-28 983 次浏览 0 次点赞 ## MMTR windows和linux系统在启动的过程中,会分配一段叫做"低内存"(low-level-memory)的保留内存区域,地址范围是在0x1000-0xA0000 这一段区间内,用于各种bios的操作。在此期间有一个操作是bios会通过这里面的字段信息设置对应的物理内存的地址范围属性,这叫做 "MTRRS -memory-type range register"(内存范围类别注册) 为什么要这样做呢? 答案: 告訴CPU在解析或者说操作内存的时候应该用什么手段。一般情况下,内存都是应该是被设置为有cache来帮助CPU操作内存的,这样的话整个系统的效率就会比较高。但是某些设备所使用的内存却比较特殊,比如说Frame buffer就是一种很好的例子。那么软件(一般情况下是BIOS)就需要配置CPU的相关寄存来申明那些范围的内存在处理的时候需要做什么样的动作(这就是类型)。总的来说它们允许CPU优化不同类型的内存如RAM,ROM和帧缓冲内存(frame buffer),内存操作映射I / O设备。这种做法简化了内存控制系统的硬件设计引脚。 根据intel手册,内存有以下的类型:  MTRR机制允许多达96个内存范围(memory ranges)在物理内存的定义,它定义了一系列的MSRs,这些寄存器分别去说明MSR定义中包含的那段内存的具体类型 ## MMU 现在的计算机基本上都带了一个叫做MMU(memory management unit)的东西,这个东西你可以理解为内存管理机制(虽然有点偏离意思),他主要负责内存访问、读写权限控制、虚拟地址到物理地址转换。 **请注意这里有个小知识点,虚拟内存的块我们叫做页(page),而物理内存的块我们叫做帧(frame)** intel把虚拟内存分为一个一个4KB的页面(可以是其他大小),物理内存分为一个一个4KB的帧,之后在此基础上设置了TLB机制(后备缓冲区,简单理解为是一种CPU缓存就行) 物理与虚拟地址转换图:  如果你还是不理解物理地址与虚拟地址这些的关系,可以参考一下大学课本《计算机组成原理》这本书,里面有详细的介绍 ## EPT EPT是intel的CPU的一个名词,全名 Extended Page Table(NPT跟这个差不多),或者叫做SLAT,他最简单的理解就是: 以前: 物理内存 -> MMU -> 虚拟内存 EPT开启: guest access物理内存 -> EPT -> MMU -> 虚拟内存 ## HyperPlatform的EPT实现 INTEL SDM手册定义的EPT结构如下: ```cpp Extended-Page-Table Pointer (EPTP) union EptPointer { ULONG64 all; struct { ULONG64 memory_type : 3; //!< [0:2] ULONG64 page_walk_length : 3; //!< [3:5] ULONG64 enable_accessed_and_dirty_flags : 1; //!< [6] ULONG64 reserved1 : 5; //!< [7:11] ULONG64 pml4_address : 36; //!< [12:48-1] ULONG64 reserved2 : 16; //!< [48:63] } fields; }; static_assert(sizeof(EptPointer) == 8, "Size check"); ``` HyperPlatform首先初始化了一块内存区域用于存放包含这个EptPointer的自己定义的结构,此外还定义了一个PML4结构: ```cpp // Allocate EptPointer const auto ept_poiner = reinterpret_cast(ExAllocatePoolWithTag( NonPagedPool, PAGE_SIZE, kHyperPlatformCommonPoolTag)); if (!ept_poiner) { ExFreePoolWithTag(ept_data, kHyperPlatformCommonPoolTag); return nullptr; } RtlZeroMemory(ept_poiner, PAGE_SIZE); // Allocate EPT_PML4 and initialize EptPointer const auto ept_pml4 = reinterpret_cast(ExAllocatePoolWithTag( NonPagedPool, PAGE_SIZE, kHyperPlatformCommonPoolTag)); if (!ept_pml4) { ExFreePoolWithTag(ept_poiner, kHyperPlatformCommonPoolTag); ExFreePoolWithTag(ept_data, kHyperPlatformCommonPoolTag); return nullptr; } RtlZeroMemory(ept_pml4, PAGE_SIZE); ``` 之后填充这些字段: ```cpp ept_poiner->fields.memory_type = static_cast(EptpGetMemoryType(UtilPaFromVa(ept_pml4))); ept_poiner->fields.page_walk_length = kEptPageWalkLevel - 1; ept_poiner->fields.pml4_address = UtilPfnFromPa(UtilPaFromVa(ept_pml4)); //注意这里 ``` page_walk_length是4的原因是他只最多控制512G的内存,EptpGetMemoryType是一个函数通过MTRR获取我们申请的PML4的内存属性然后赋值给memory_type: ```cpp // Returns a memory type based on MTRRs _Use_decl_annotations_ static memory_type EptpGetMemoryType( ULONG64 physical_address) { // Indicate that MTRR is not defined (as a default) UCHAR result_type = MAXUCHAR; // Looks for MTRR that includes the specified physical_address for (const auto mtrr_entry : g_eptp_mtrr_entries) { if (!mtrr_entry.enabled) { // Reached out the end of stored MTRRs break; } if (!UtilIsInBounds(physical_address, mtrr_entry.range_base, mtrr_entry.range_end)) { // This MTRR does not describe a memory type of the physical_address continue; } // See: MTRR Precedences if (mtrr_entry.fixedMtrr) { // If a fixed MTRR describes a memory type, it is priority result_type = mtrr_entry.type; break; } if (mtrr_entry.type == static_cast(memory_type::kUncacheable)) { // If a memory type is UC, it is priority. Do not continue to search as // UC has the highest priority result_type = mtrr_entry.type; break; } if (result_type == static_cast(memory_type::kWriteThrough) || mtrr_entry.type == static_cast(memory_type::kWriteThrough)) { if (result_type == static_cast(memory_type::kWriteBack)) { // If two or more MTRRs describes an over-wrapped memory region, and // one is WT and the other one is WB, use WT. However, look for other // MTRRs, as the other MTRR specifies the memory address as UC, which is // priority. result_type = static_cast(memory_type::kWriteThrough); continue; } } // Otherwise, processor behavior is undefined. We just use the last MTRR // describes the memory address. result_type = mtrr_entry.type; } // Use the default MTRR if no MTRR entry is found if (result_type == MAXUCHAR) { result_type = g_eptp_mtrr_default_type; } return static_cast(result_type); } ``` 这个mtrrs信息怎么来的?通过读前面所说的mtrr特定的寄存器内容来的: ```cpp // Reads and stores all MTRRs to set a correct memory type for EPT _Use_decl_annotations_ void EptInitializeMtrrEntries() { PAGED_CODE(); int index = 0; MtrrData *mtrr_entries = g_eptp_mtrr_entries; // Get and store the default memory type Ia32MtrrDefaultTypeMsr default_type = { UtilReadMsr64(Msr::kIa32MtrrDefType) }; g_eptp_mtrr_default_type = default_type.fields.default_mtemory_type; // Read MTRR capability Ia32MtrrCapabilitiesMsr mtrr_capabilities = { UtilReadMsr64(Msr::kIa32MtrrCap) }; HYPERPLATFORM_LOG_DEBUG( "MTRR Default=%lld, VariableCount=%lld, FixedSupported=%lld, FixedEnabled=%lld", default_type.fields.default_mtemory_type, mtrr_capabilities.fields.variable_range_count, mtrr_capabilities.fields.fixed_range_supported, default_type.fields.fixed_mtrrs_enabled); // Read fixed range MTRRs if supported if (mtrr_capabilities.fields.fixed_range_supported && default_type.fields.fixed_mtrrs_enabled) { static const auto k64kBase = 0x0; static const auto k64kManagedSize = 0x10000; static const auto k16kBase = 0x80000; static const auto k16kManagedSize = 0x4000; static const auto k4kBase = 0xC0000; static const auto k4kManagedSize = 0x1000; // The kIa32MtrrFix64k00000 manages 8 ranges of memory. The first range // starts at 0x0, and each range manages a 64k (0x10000) range. For example, // entry[0]: 0x0 : 0x10000 - 1 // entry[1]: 0x10000 : 0x20000 - 1 // ... // entry[7]: 0x70000 : 0x80000 - 1 ULONG64 offset = 0; Ia32MtrrFixedRangeMsr fixed_range = { UtilReadMsr64(Msr::kIa32MtrrFix64k00000) }; for (auto memory_type : fixed_range.fields.types) { // Each entry manages 64k (0x10000) length. ULONG64 base = k64kBase + offset; offset += k64kManagedSize; // Saves the MTRR mtrr_entries[index].enabled = true; mtrr_entries[index].fixedMtrr = true; mtrr_entries[index].type = memory_type; mtrr_entries[index].range_base = base; mtrr_entries[index].range_end = base + k64kManagedSize - 1; index++; } NT_ASSERT(k64kBase + offset == k16kBase); // kIa32MtrrFix16k80000 manages 8 ranges of memory. The first range starts // at 0x80000, and each range manages a 16k (0x4000) range. For example, // entry[0]: 0x80000 : 0x84000 - 1 // entry[1]: 0x88000 : 0x8C000 - 1 // ... // entry[7]: 0x9C000 : 0xA0000 - 1 // Also, subsequent memory ranges are managed by other MSR, // kIa32MtrrFix16kA0000, which manages 8 ranges of memory starting at // 0xA0000 in the same fashion. For example, // entry[0]: 0xA0000 : 0xA4000 - 1 // entry[1]: 0xA8000 : 0xAC000 - 1 // ... // entry[7]: 0xBC000 : 0xC0000 - 1 offset = 0; for (auto msr = static_cast(Msr::kIa32MtrrFix16k80000); msr <= static_cast(Msr::kIa32MtrrFix16kA0000); msr++) { fixed_range.all = UtilReadMsr64(static_cast(msr)); for (auto memory_type : fixed_range.fields.types) { // Each entry manages 16k (0x4000) length. ULONG64 base = k16kBase + offset; offset += k16kManagedSize; // Saves the MTRR mtrr_entries[index].enabled = true; mtrr_entries[index].fixedMtrr = true; mtrr_entries[index].type = memory_type; mtrr_entries[index].range_base = base; mtrr_entries[index].range_end = base + k16kManagedSize - 1; index++; } } NT_ASSERT(k16kBase + offset == k4kBase); // kIa32MtrrFix4kC0000 manages 8 ranges of memory. The first range starts // at 0xC0000, and each range manages a 4k (0x1000) range. For example, // entry[0]: 0xC0000 : 0xC1000 - 1 // entry[1]: 0xC1000 : 0xC2000 - 1 // ... // entry[7]: 0xC7000 : 0xC8000 - 1 // Also, subsequent memory ranges are managed by other MSRs such as // kIa32MtrrFix4kC8000, kIa32MtrrFix4kD0000, and kIa32MtrrFix4kF8000. Each // MSR manages 8 ranges of memory in the same fashion up to 0x100000. offset = 0; for (auto msr = static_cast(Msr::kIa32MtrrFix4kC0000); msr <= static_cast(Msr::kIa32MtrrFix4kF8000); msr++) { fixed_range.all = UtilReadMsr64(static_cast(msr)); for (auto memory_type : fixed_range.fields.types) { // Each entry manages 4k (0x1000) length. ULONG64 base = k4kBase + offset; offset += k4kManagedSize; // Saves the MTRR mtrr_entries[index].enabled = true; mtrr_entries[index].fixedMtrr = true; mtrr_entries[index].type = memory_type; mtrr_entries[index].range_base = base; mtrr_entries[index].range_end = base + k4kManagedSize - 1; index++; } } NT_ASSERT(k4kBase + offset == 0x100000); } // Read all variable range MTRRs for (auto i = 0; i < mtrr_capabilities.fields.variable_range_count; i++) { // Read MTRR mask and check if it is in use const auto phy_mask = static_cast(Msr::kIa32MtrrPhysMaskN) + i * 2; Ia32MtrrPhysMaskMsr mtrr_mask = { UtilReadMsr64(static_cast(phy_mask)) }; if (!mtrr_mask.fields.valid) { continue; } // Get a length this MTRR manages ULONG length; BitScanForward64(&length, mtrr_mask.fields.phys_mask * PAGE_SIZE); // Read MTRR base and calculate a range this MTRR manages const auto phy_base = static_cast(Msr::kIa32MtrrPhysBaseN) + i * 2; Ia32MtrrPhysBaseMsr mtrr_base = { UtilReadMsr64(static_cast(phy_base)) }; ULONG64 base = mtrr_base.fields.phys_base * PAGE_SIZE; ULONG64 end = base + (1ull << length) - 1; // Save it mtrr_entries[index].enabled = true; mtrr_entries[index].fixedMtrr = false; mtrr_entries[index].type = mtrr_base.fields.type; mtrr_entries[index].range_base = base; mtrr_entries[index].range_end = end; index++; } } ``` ps: 在intel sdm手册里说了"MTRRS不影响EPT的操作",但是hyperplatform还是这样写应该是为了严谨.(应该可以直接写一个WB而不用那么麻烦)  构造完EPTPoint的结构后,给VMCS的EptPointer字段写入ept_pointer的地址即可: ``` error |= UtilVmWrite64(VmcsField::kEptPointer, EptGetEptPointer(processor_data->ept_data));//其实就是ept_data->ept_pointer->all ``` 之后如果你想操作EPT做一些奇奇怪怪的事情,比如EPT HOOK啊之类的,要跟操作原版PML4一样遍历你设置的EPT_PML4,做XXOO: ```cpp _Use_decl_annotations_ static bool EptpWalker( EptCommonEntry *table, ULONG table_level, pEptWalkerCallback callback, void* context) { bool ret = false; for (auto i = 0ul; i < 512; ++i) { const auto entry = table[i]; if (callback) { callback(&table[i], context); } if (table_level == 1) { continue; } if (entry.fields.physial_address) { const auto sub_table = reinterpret_cast( UtilVaFromPfn(entry.fields.physial_address)); switch (table_level) { case 4: // table == PML4, sub_table == PDPT case 3: // table == PDPT, sub_table == PDT case 2: // table == PDT, sub_table == PT ret = EptpWalker(sub_table, table_level - 1, callback, context); break; case 1: break; default: HYPERPLATFORM_COMMON_DBG_BREAK(); break; } } } return ret; } ``` 本文由 huoji 创作,采用 知识共享署名 3.0,可自由转载、引用,但需署名作者且注明文章出处。 点赞 0
还不快抢沙发